#
# File for replicating all analyses in "Group Ties amid Industrial Change" (World Politics)
# Author: Noah Zucker (noah.zucker@columbia.edu)
#
# Software: R version 4.1.2 / RStudio version RStudio 2022.02.0+443 / macOS 12.3
# Hardware: MacBook Pro (16-inch, 2019) / Processor: 2.4 GHz 8-Core Intel Core i9 / RAM: 64 GB 2667 MHz DDR4
#
# Last edited May 9, 2022
#

# NB: This code is dependent upon data generated in the ipums.R file.

# BASIC GUIDE FOR MAIN DATASET (mlp_full):
# ----------
# Each row represents one immigrant matched between two consecutive censuses
# Variable suffix "_START" indicates value for immigrant at start of decade
# Variable suffix "_END" indicates value for immigrant at end of decade
# "0010" and "10" denote the range 1900-10 (1900: start, 1910: end); "1020" denotes the range 1910-20

# MAIN VARIABLES (all data originally from U.S. Census Bureau, as cleaned and provided by IPUMS USA, unless otherwise stated / see README):
# ---------
# GROUP_START: ethnic group for immigrant
# RANGE: decade (1900s-10, 1910s-1020)
# STATEICP_START: state (ICPSR code)
# SCICP_START: state and county (ICPSR codes)
# STATEICP_RANGE: combination of state and decade
# GROUP_SCICP: combination of ethnic group and county
# weight2_coal: regression weights
# GROUP_COAL_PCTGROUP_START: group concentration in coal at start of decade
# base_decline_magnitude: shock intensity for county's coal mining industry during decade (production data originally from U.S. Geological Survey)
# BARTIK_MEAN: Bartik estimate of decline in local non-coal economy during decade (production data originally from U.S. Census Bureau)
# NATURALIZED_END: binary indicating if immigrant naturalized by end of decade
# INTERMARRIAGE_NWHITE_END: binary indicating if immigrant married native white American by end of decade
# INTERMARRIAGE_IMMIG_END: binary indicating if immigrant married non-coethnic immigrant by end of decade
# SPEAKENG_END: binary indicating if immigrant spoke English by end of decade
# YRIMMIG_START: immigrant's first year of emigration to the US
# INCWAGE_MEDIAN_START: immigrant's estimated income from employment
# MALE: binary indicating if immigrant male or female
# MARRIED_START: binary indicating if immigrant was married and living with spouse at start of decade
# COAL_FAM_START: binary indicating if immigrant resided with anyone working in coal mining
# GROUP_PCTCOUNTY_START: coethnic population as % county population
# COAL_RELIANCE_START: % county working in coal mining
# RURAL_PCTCOUNTY_START: % county residing in rural areas
# BLACK_PCTCOUNTY_START: Black residents as % county population
# COAL_END: binary indicating whether immigrant employed in coal mining at end of decade
# NATURALIZING_END: binary indicating whether immigrant had begun naturalization process by end of decade
# AGE_START: immigrant age at start of decade
# IND1950_START: immigrant industry of employment at start of decade
# CITIZEN_START: immigrant citizenship status at start of decade

# ******************* #
##################### #
#### SESSION SETUP ####
##################### #
# ******************* #

rm(list = ls()); gc()

set.seed(1900)

#install.packages("pacman")
pacman::p_load(tidyverse,
               tidylog,
               AER,
               broom,
               data.table,
               DeclareDesign,
               ggpubr,
               ggthemes,
               ggmap,
               interflex,
               janitor,
               lfe,
               lmtest,
               magrittr,
               patchwork,
               pryr,
               RColorBrewer,
               readxl,
               rgdal,
               rgeos,
               sf,
               stargazer,
               texreg,
               xtable)

# > Interaction plot functions ####

felm_int_plot <- function(modelMin, modelFull, # creates interaction plots
                          ymin, ymax,
                          title = ""){
  .modelMin_covMat <- vcov(modelMin)
  .modelMin_mod_frame <- model.frame(modelMin)
  .modelMin_beta_1 <- modelMin$coefficients["GROUP_COAL_PCTGROUP_START_SQRT",]
  .modelMin_beta_3 <- modelMin$coefficients["base_decline_magnitude_sqrt:GROUP_COAL_PCTGROUP_START_SQRT",]
  .modelMin_min_val <- min(.modelMin_mod_frame$base_decline_magnitude_sqrt)
  .modelMin_max_val <- max(.modelMin_mod_frame$base_decline_magnitude_sqrt)
  .modelMin_increment = (.modelMin_max_val-.modelMin_min_val)/49
  .modelMin_x_2 <- seq(from = .modelMin_min_val, to = .modelMin_max_val, by = .modelMin_increment)
  .modelMin_delta_1 = .modelMin_beta_1 + .modelMin_beta_3*.modelMin_x_2
  .modelMin_var_1 = .modelMin_covMat["GROUP_COAL_PCTGROUP_START_SQRT","GROUP_COAL_PCTGROUP_START_SQRT"] +
    (.modelMin_x_2^2)*.modelMin_covMat["base_decline_magnitude_sqrt:GROUP_COAL_PCTGROUP_START_SQRT",
                                       "base_decline_magnitude_sqrt:GROUP_COAL_PCTGROUP_START_SQRT"] +
    2*.modelMin_x_2*.modelMin_covMat["GROUP_COAL_PCTGROUP_START_SQRT","base_decline_magnitude_sqrt:GROUP_COAL_PCTGROUP_START_SQRT"]
  .modelMin_se_1 = sqrt(.modelMin_var_1)
  .modelMin_z_score = qnorm(1 - ((1 - .95)/2))
  .modelMin_upper_bound = .modelMin_delta_1 + .modelMin_z_score*.modelMin_se_1
  .modelMin_lower_bound = .modelMin_delta_1 - .modelMin_z_score*.modelMin_se_1
  
  .modelFull_covMat <- vcov(modelFull)
  .modelFull_mod_frame <- model.frame(modelFull)
  .modelFull_beta_1 <- modelFull$coefficients["GROUP_COAL_PCTGROUP_START_SQRT",]
  .modelFull_beta_3 <- modelFull$coefficients["base_decline_magnitude_sqrt:GROUP_COAL_PCTGROUP_START_SQRT",]
  .modelFull_min_val <- min(.modelFull_mod_frame$base_decline_magnitude_sqrt)
  .modelFull_max_val <- max(.modelFull_mod_frame$base_decline_magnitude_sqrt)
  .modelFull_increment = (.modelFull_max_val-.modelFull_min_val)/49
  .modelFull_x_2 <- seq(from = .modelFull_min_val, to = .modelFull_max_val, by = .modelFull_increment)
  .modelFull_delta_1 = .modelFull_beta_1 + .modelFull_beta_3*.modelFull_x_2
  .modelFull_var_1 = .modelFull_covMat["GROUP_COAL_PCTGROUP_START_SQRT","GROUP_COAL_PCTGROUP_START_SQRT"] +
    (.modelFull_x_2^2)*.modelFull_covMat["base_decline_magnitude_sqrt:GROUP_COAL_PCTGROUP_START_SQRT",
                                         "base_decline_magnitude_sqrt:GROUP_COAL_PCTGROUP_START_SQRT"] +
    2*.modelFull_x_2*.modelFull_covMat["GROUP_COAL_PCTGROUP_START_SQRT","base_decline_magnitude_sqrt:GROUP_COAL_PCTGROUP_START_SQRT"]
  .modelFull_se_1 = sqrt(.modelFull_var_1)
  .modelFull_z_score = qnorm(1 - ((1 - .95)/2))
  .modelFull_upper_bound = .modelFull_delta_1 + .modelFull_z_score*.modelFull_se_1
  .modelFull_lower_bound = .modelFull_delta_1 - .modelFull_z_score*.modelFull_se_1
  
  ggplot() +
    geom_hline(aes(yintercept = 0), lty = 5, color = "gray90") +
    geom_line(aes(x = .modelMin_x_2,
                  y = .modelMin_upper_bound), lty = 2, color = "gray50") +
    geom_line(aes(x = .modelMin_x_2,
                  y = .modelMin_lower_bound), lty = 2, color = "gray50") +
    geom_ribbon(aes(x = .modelFull_x_2,
                    ymin = .modelFull_lower_bound,
                    ymax = .modelFull_upper_bound), fill = "black", alpha = .2) +
    geom_line(aes(x = .modelFull_x_2,
                  y = .modelFull_delta_1)) +
    ylab("Marginal effect of group concentration") +
    xlab("Shock intensity") +
    ggtitle(title) +
    ylim(ymin, ymax) +
    theme_classic() +
    theme(plot.title = element_text(size = 10, face = "bold"),
          axis.title = element_text(size = 10))
}

felm_int_plot_for_rail_test <- function(modelMin, modelFull, # creates interaction plots for rail-connected test
                                        ymin, ymax,
                                        title = ""){
  .modelMin_covMat <- vcov(modelMin)
  .modelMin_mod_frame <- model.frame(modelMin)
  .modelMin_beta_1 <- modelMin$coefficients["connected_GROUP_COAL_PCTGROUP_START_SQRT",]
  .modelMin_beta_3 <- modelMin$coefficients["base_decline_magnitude_yes_connection_sqrt:connected_GROUP_COAL_PCTGROUP_START_SQRT",]
  .modelMin_min_val <- min(.modelMin_mod_frame$base_decline_magnitude_yes_connection_sqrt)
  .modelMin_max_val <- max(.modelMin_mod_frame$base_decline_magnitude_yes_connection_sqrt)
  .modelMin_increment = (.modelMin_max_val-.modelMin_min_val)/49
  .modelMin_x_2 <- seq(from = .modelMin_min_val, to = .modelMin_max_val, by = .modelMin_increment)
  .modelMin_delta_1 = .modelMin_beta_1 + .modelMin_beta_3*.modelMin_x_2
  .modelMin_var_1 = .modelMin_covMat["connected_GROUP_COAL_PCTGROUP_START_SQRT","connected_GROUP_COAL_PCTGROUP_START_SQRT"] +
    (.modelMin_x_2^2)*.modelMin_covMat["base_decline_magnitude_yes_connection_sqrt:connected_GROUP_COAL_PCTGROUP_START_SQRT",
                                       "base_decline_magnitude_yes_connection_sqrt:connected_GROUP_COAL_PCTGROUP_START_SQRT"] +
    2*.modelMin_x_2*.modelMin_covMat["connected_GROUP_COAL_PCTGROUP_START_SQRT","base_decline_magnitude_yes_connection_sqrt:connected_GROUP_COAL_PCTGROUP_START_SQRT"]
  .modelMin_se_1 = sqrt(.modelMin_var_1)
  .modelMin_z_score = qnorm(1 - ((1 - .95)/2))
  .modelMin_upper_bound = .modelMin_delta_1 + .modelMin_z_score*.modelMin_se_1
  .modelMin_lower_bound = .modelMin_delta_1 - .modelMin_z_score*.modelMin_se_1
  
  .modelFull_covMat <- vcov(modelFull)
  .modelFull_mod_frame <- model.frame(modelFull)
  .modelFull_beta_1 <- modelFull$coefficients["connected_GROUP_COAL_PCTGROUP_START_SQRT",]
  .modelFull_beta_3 <- modelFull$coefficients["base_decline_magnitude_yes_connection_sqrt:connected_GROUP_COAL_PCTGROUP_START_SQRT",]
  .modelFull_min_val <- min(.modelFull_mod_frame$base_decline_magnitude_yes_connection_sqrt)
  .modelFull_max_val <- max(.modelFull_mod_frame$base_decline_magnitude_yes_connection_sqrt)
  .modelFull_increment = (.modelFull_max_val-.modelFull_min_val)/49
  .modelFull_x_2 <- seq(from = .modelFull_min_val, to = .modelFull_max_val, by = .modelFull_increment)
  .modelFull_delta_1 = .modelFull_beta_1 + .modelFull_beta_3*.modelFull_x_2
  .modelFull_var_1 = .modelFull_covMat["connected_GROUP_COAL_PCTGROUP_START_SQRT","connected_GROUP_COAL_PCTGROUP_START_SQRT"] +
    (.modelFull_x_2^2)*.modelFull_covMat["base_decline_magnitude_yes_connection_sqrt:connected_GROUP_COAL_PCTGROUP_START_SQRT",
                                         "base_decline_magnitude_yes_connection_sqrt:connected_GROUP_COAL_PCTGROUP_START_SQRT"] +
    2*.modelFull_x_2*.modelFull_covMat["connected_GROUP_COAL_PCTGROUP_START_SQRT","base_decline_magnitude_yes_connection_sqrt:connected_GROUP_COAL_PCTGROUP_START_SQRT"]
  .modelFull_se_1 = sqrt(.modelFull_var_1)
  .modelFull_z_score = qnorm(1 - ((1 - .95)/2))
  .modelFull_upper_bound = .modelFull_delta_1 + .modelFull_z_score*.modelFull_se_1
  .modelFull_lower_bound = .modelFull_delta_1 - .modelFull_z_score*.modelFull_se_1
  
  ggplot() +
    geom_hline(aes(yintercept = 0), lty = 5, color = "gray90") +
    geom_line(aes(x = .modelMin_x_2,
                  y = .modelMin_upper_bound), lty = 2, color = "gray50") +
    geom_line(aes(x = .modelMin_x_2,
                  y = .modelMin_lower_bound), lty = 2, color = "gray50") +
    geom_ribbon(aes(x = .modelFull_x_2,
                    ymin = .modelFull_lower_bound,
                    ymax = .modelFull_upper_bound), fill = "black", alpha = .2) +
    geom_line(aes(x = .modelFull_x_2,
                  y = .modelFull_delta_1)) +
    ylab("Marginal effect of group concentration") +
    xlab("Shock intensity") +
    ggtitle(title) +
    ylim(ymin, ymax) +
    theme_classic() +
    theme(plot.title = element_text(size = 10, face = "bold"),
          axis.title = element_text(size = 10))
}

felm_int_plot_for_noStrikes <- function(modelMin, modelFull, # creates interaction plots for test excluding major strikes
                                        ymin, ymax,
                                        title = ""){
  .modelMin_covMat <- vcov(modelMin)
  .modelMin_mod_frame <- model.frame(modelMin)
  .modelMin_beta_1 <- modelMin$coefficients["GROUP_COAL_PCTGROUP_START_SQRT",]
  .modelMin_beta_3 <- modelMin$coefficients["base_decline_magnitude_noStrikes_sqrt:GROUP_COAL_PCTGROUP_START_SQRT",]
  .modelMin_min_val <- min(.modelMin_mod_frame$base_decline_magnitude_noStrikes_sqrt)
  .modelMin_max_val <- max(.modelMin_mod_frame$base_decline_magnitude_noStrikes_sqrt)
  .modelMin_increment = (.modelMin_max_val-.modelMin_min_val)/49
  .modelMin_x_2 <- seq(from = .modelMin_min_val, to = .modelMin_max_val, by = .modelMin_increment)
  .modelMin_delta_1 = .modelMin_beta_1 + .modelMin_beta_3*.modelMin_x_2
  .modelMin_var_1 = .modelMin_covMat["GROUP_COAL_PCTGROUP_START_SQRT","GROUP_COAL_PCTGROUP_START_SQRT"] +
    (.modelMin_x_2^2)*.modelMin_covMat["base_decline_magnitude_noStrikes_sqrt:GROUP_COAL_PCTGROUP_START_SQRT",
                                       "base_decline_magnitude_noStrikes_sqrt:GROUP_COAL_PCTGROUP_START_SQRT"] +
    2*.modelMin_x_2*.modelMin_covMat["GROUP_COAL_PCTGROUP_START_SQRT","base_decline_magnitude_noStrikes_sqrt:GROUP_COAL_PCTGROUP_START_SQRT"]
  .modelMin_se_1 = sqrt(.modelMin_var_1)
  .modelMin_z_score = qnorm(1 - ((1 - .95)/2))
  .modelMin_upper_bound = .modelMin_delta_1 + .modelMin_z_score*.modelMin_se_1
  .modelMin_lower_bound = .modelMin_delta_1 - .modelMin_z_score*.modelMin_se_1
  
  .modelFull_covMat <- vcov(modelFull)
  .modelFull_mod_frame <- model.frame(modelFull)
  .modelFull_beta_1 <- modelFull$coefficients["GROUP_COAL_PCTGROUP_START_SQRT",]
  .modelFull_beta_3 <- modelFull$coefficients["base_decline_magnitude_noStrikes_sqrt:GROUP_COAL_PCTGROUP_START_SQRT",]
  .modelFull_min_val <- min(.modelFull_mod_frame$base_decline_magnitude_noStrikes_sqrt)
  .modelFull_max_val <- max(.modelFull_mod_frame$base_decline_magnitude_noStrikes_sqrt)
  .modelFull_increment = (.modelFull_max_val-.modelFull_min_val)/49
  .modelFull_x_2 <- seq(from = .modelFull_min_val, to = .modelFull_max_val, by = .modelFull_increment)
  .modelFull_delta_1 = .modelFull_beta_1 + .modelFull_beta_3*.modelFull_x_2
  .modelFull_var_1 = .modelFull_covMat["GROUP_COAL_PCTGROUP_START_SQRT","GROUP_COAL_PCTGROUP_START_SQRT"] +
    (.modelFull_x_2^2)*.modelFull_covMat["base_decline_magnitude_noStrikes_sqrt:GROUP_COAL_PCTGROUP_START_SQRT",
                                         "base_decline_magnitude_noStrikes_sqrt:GROUP_COAL_PCTGROUP_START_SQRT"] +
    2*.modelFull_x_2*.modelFull_covMat["GROUP_COAL_PCTGROUP_START_SQRT","base_decline_magnitude_noStrikes_sqrt:GROUP_COAL_PCTGROUP_START_SQRT"]
  .modelFull_se_1 = sqrt(.modelFull_var_1)
  .modelFull_z_score = qnorm(1 - ((1 - .95)/2))
  .modelFull_upper_bound = .modelFull_delta_1 + .modelFull_z_score*.modelFull_se_1
  .modelFull_lower_bound = .modelFull_delta_1 - .modelFull_z_score*.modelFull_se_1
  
  ggplot() +
    geom_hline(aes(yintercept = 0), lty = 5, color = "gray90") +
    geom_line(aes(x = .modelMin_x_2,
                  y = .modelMin_upper_bound), lty = 2, color = "gray50") +
    geom_line(aes(x = .modelMin_x_2,
                  y = .modelMin_lower_bound), lty = 2, color = "gray50") +
    geom_ribbon(aes(x = .modelFull_x_2,
                    ymin = .modelFull_lower_bound,
                    ymax = .modelFull_upper_bound), fill = "black", alpha = .2) +
    geom_line(aes(x = .modelFull_x_2,
                  y = .modelFull_delta_1)) +
    ylab("Marginal effect of group concentration") +
    xlab("Shock intensity") +
    ggtitle(title) +
    ylim(ymin, ymax) +
    theme_classic() +
    theme(plot.title = element_text(size = 10, face = "bold"),
          axis.title = element_text(size = 10))
}

felm_int_plot_for_ln <- function(modelMin, modelFull, # creates interaction plots for test with log transformations
                                 ymin, ymax,
                                 title = ""){
  .modelMin_covMat <- vcov(modelMin)
  .modelMin_mod_frame <- model.frame(modelMin)
  .modelMin_beta_1 <- modelMin$coefficients["GROUP_COAL_PCTGROUP_START_LN",]
  .modelMin_beta_3 <- modelMin$coefficients["base_decline_magnitude_ln:GROUP_COAL_PCTGROUP_START_LN",]
  .modelMin_min_val <- min(.modelMin_mod_frame$base_decline_magnitude_ln)
  .modelMin_max_val <- max(.modelMin_mod_frame$base_decline_magnitude_ln)
  .modelMin_increment = (.modelMin_max_val-.modelMin_min_val)/49
  .modelMin_x_2 <- seq(from = .modelMin_min_val, to = .modelMin_max_val, by = .modelMin_increment)
  .modelMin_delta_1 = .modelMin_beta_1 + .modelMin_beta_3*.modelMin_x_2
  .modelMin_var_1 = .modelMin_covMat["GROUP_COAL_PCTGROUP_START_LN","GROUP_COAL_PCTGROUP_START_LN"] +
    (.modelMin_x_2^2)*.modelMin_covMat["base_decline_magnitude_ln:GROUP_COAL_PCTGROUP_START_LN",
                                       "base_decline_magnitude_ln:GROUP_COAL_PCTGROUP_START_LN"] +
    2*.modelMin_x_2*.modelMin_covMat["GROUP_COAL_PCTGROUP_START_LN","base_decline_magnitude_ln:GROUP_COAL_PCTGROUP_START_LN"]
  .modelMin_se_1 = sqrt(.modelMin_var_1)
  .modelMin_z_score = qnorm(1 - ((1 - .95)/2))
  .modelMin_upper_bound = .modelMin_delta_1 + .modelMin_z_score*.modelMin_se_1
  .modelMin_lower_bound = .modelMin_delta_1 - .modelMin_z_score*.modelMin_se_1
  
  .modelFull_covMat <- vcov(modelFull)
  .modelFull_mod_frame <- model.frame(modelFull)
  .modelFull_beta_1 <- modelFull$coefficients["GROUP_COAL_PCTGROUP_START_LN",]
  .modelFull_beta_3 <- modelFull$coefficients["base_decline_magnitude_ln:GROUP_COAL_PCTGROUP_START_LN",]
  .modelFull_min_val <- min(.modelFull_mod_frame$base_decline_magnitude_ln)
  .modelFull_max_val <- max(.modelFull_mod_frame$base_decline_magnitude_ln)
  .modelFull_increment = (.modelFull_max_val-.modelFull_min_val)/49
  .modelFull_x_2 <- seq(from = .modelFull_min_val, to = .modelFull_max_val, by = .modelFull_increment)
  .modelFull_delta_1 = .modelFull_beta_1 + .modelFull_beta_3*.modelFull_x_2
  .modelFull_var_1 = .modelFull_covMat["GROUP_COAL_PCTGROUP_START_LN","GROUP_COAL_PCTGROUP_START_LN"] +
    (.modelFull_x_2^2)*.modelFull_covMat["base_decline_magnitude_ln:GROUP_COAL_PCTGROUP_START_LN",
                                         "base_decline_magnitude_ln:GROUP_COAL_PCTGROUP_START_LN"] +
    2*.modelFull_x_2*.modelFull_covMat["GROUP_COAL_PCTGROUP_START_LN","base_decline_magnitude_ln:GROUP_COAL_PCTGROUP_START_LN"]
  .modelFull_se_1 = sqrt(.modelFull_var_1)
  .modelFull_z_score = qnorm(1 - ((1 - .95)/2))
  .modelFull_upper_bound = .modelFull_delta_1 + .modelFull_z_score*.modelFull_se_1
  .modelFull_lower_bound = .modelFull_delta_1 - .modelFull_z_score*.modelFull_se_1
  
  ggplot() +
    geom_hline(aes(yintercept = 0), lty = 5, color = "gray90") +
    geom_line(aes(x = .modelMin_x_2,
                  y = .modelMin_upper_bound), lty = 2, color = "gray50") +
    geom_line(aes(x = .modelMin_x_2,
                  y = .modelMin_lower_bound), lty = 2, color = "gray50") +
    geom_ribbon(aes(x = .modelFull_x_2,
                    ymin = .modelFull_lower_bound,
                    ymax = .modelFull_upper_bound), fill = "black", alpha = .2) +
    geom_line(aes(x = .modelFull_x_2,
                  y = .modelFull_delta_1)) +
    ylab("Marginal effect of group concentration") +
    xlab("Shock intensity") +
    ggtitle(title) +
    ylim(ymin, ymax) +
    theme_classic() +
    theme(plot.title = element_text(size = 10, face = "bold"),
          axis.title = element_text(size = 10))
}

# ************************************** #
######################################## #
#### *** DATA LOADING & FILTERING *** ####
######################################## #
# ************************************** #

# Loading full dataset
mlp_full <- fread("mlp_full.csv") # data file created in ipums.R
mlp_full <- mlp_full[GROUP_START != ""] # excluding observations with unidentified ethnic group (likely non-European immigrants)

# Subsetting to immigrant miners, eliminating cases with missing data for certain regression analyses
mlp_coal_v2 <- mlp_full[CITIZEN_START == 3 & IND1950_START == 216 & GROUP_START != "" & GROUP_START != "nativeblack" & GROUP_START != "nativewhite" &
                          AGE_START >= 21 & !is.na(NATURALIZING_END) & !is.na(base_decline_magnitude) & !is.na(GROUP_COAL_PCTGROUP_START_SQRT)]
mlp_coal_wRHS_v2 <- mlp_full[CITIZEN_START == 3 & IND1950_START == 216 & GROUP_START != "" & GROUP_START != "nativeblack" & GROUP_START != "nativewhite" &
                               AGE_START >= 21 & !is.na(NATURALIZING_END) & !is.na(base_decline_magnitude) & !is.na(GROUP_COAL_PCTGROUP_START_SQRT) & !is.na(YRIMMIG_START) & !is.na(INCWAGE_MEDIAN_START_LN) & !is.na(RURAL_COUNTY_START)]

# Subsetting to coal-adjacent immigrants, eliminating cases with missing data for certain regression analyses
mlp_noncoal_v2 <- mlp_full[CITIZEN_START == 3 & COAL_FAM_START == FALSE & IND1950_START != 216 & GROUP_START != "" & GROUP_START != "nativeblack" & GROUP_START != "nativewhite" &
                             AGE_START >= 21 & !is.na(NATURALIZED_END) & !is.na(base_decline_magnitude) & !is.na(GROUP_COAL_PCTGROUP_START_SQRT)]
mlp_noncoal_wRHS_v2 <- mlp_full[CITIZEN_START == 3 & COAL_FAM_START == FALSE & IND1950_START != 216 & GROUP_START != "" & GROUP_START != "nativeblack" & GROUP_START != "nativewhite" &
                                  AGE_START >= 21 & !is.na(NATURALIZING_END) & !is.na(base_decline_magnitude) & !is.na(GROUP_COAL_PCTGROUP_START_SQRT) & !is.na(YRIMMIG_START) & !is.na(INCWAGE_MEDIAN_START_LN) & !is.na(RURAL_COUNTY_START)]

# Indicating whether immigrant is employed in coal at end of decade
mlp_coal_v2 <- mlp_coal_v2[, COAL_END := as.integer(IND1950_END == 216)]
mlp_noncoal_v2 <- mlp_noncoal_v2[, COAL_END := as.integer(IND1950_END == 216)]

# Adding log transformations
mlp_full <- mlp_full[, base_decline_magnitude_ln := log1p(base_decline_magnitude)]
mlp_full <- mlp_full[, GROUP_COAL_PCTGROUP_START_LN := log1p(GROUP_COAL_PCTGROUP_START)]
mlp_full <- mlp_full[, BARTIK_MEAN_LN := log1p(BARTIK_MEAN)]
mlp_full <- mlp_full[, GROUP_PCTCOUNTY_START_LN := log1p(GROUP_PCTCOUNTY_START)]
mlp_full <- mlp_full[, COAL_RELIANCE_START_LN := log1p(COAL_RELIANCE_START)]
mlp_full <- mlp_full[, BLACK_PCTCOUNTY_START_LN := log1p(BLACK_PCTCOUNTY_START)]

# ***************************************************** #
####################################################### #
#### *** STATISTICAL ANALYSES IN MAIN MANUSCRIPT *** ####
####################################################### #
# ***************************************************** #

# > Figure 1: theoretical depiction of enclaves ####

group1 <- tibble(x = rep(1:10, 10),
                 red = runif(100, 0, 1)) %>%
  mutate(red = as.numeric(red > .5)) %>%
  mutate(y = sort(rep(1:10, 10)))
group2 <- tibble(x = rep(1:10, 10),
                 red = runif(100, 0, 1)) %>%
  mutate(red = as.numeric(red < .1)) %>%
  mutate(y = sort(rep(1:10, 10)))

ggplot(group1, aes(x, y)) +
  geom_tile(aes(fill = as.factor(red))) +
  scale_fill_manual(values = c("gray90", "black")) +
  theme_classic() +
  ggtitle("Group A") +
  theme(legend.position = "none",
        axis.title = element_blank(),
        axis.text = element_blank(),
        axis.ticks = element_blank(),
        axis.line = element_blank()) -> group1_plot
ggplot(group2, aes(x, y)) +
  geom_tile(aes(fill = as.factor(red))) +
  scale_fill_manual(values = c("gray90", "black")) +
  theme_classic() +
  ggtitle("Group B") +
  theme(legend.position = "none",
        axis.title = element_blank(),
        axis.text = element_blank(),
        axis.ticks = element_blank(),
        axis.line = element_blank()) -> group2_plot

ggarrange(group1_plot,
          group2_plot,
          nrow = 1) -> group_plots

ggsave(filename = "Figure 1.tiff",
       plot = group_plots,
       width = 4.5,
       height = 2.25,
       units = "in")

# > Figure 2: map of matched immigrants ####
# - NB: This section relies on county shapefiles from IPUMS NHGIS (http://doi.org/10.18128/D050.V16.0). See README for data access details.

scicp_1900 <- mlp_full[RANGE == 10]
scicp_1900 <- scicp_1900[, .(count = .N,
                             decline = mean(base_decline_magnitude, na.rm = TRUE)), 
                         by = SCICP_START] # counting matched immigrants by county (1900)
scicp_1910 <- mlp_full[RANGE == 1020]
scicp_1910 <- scicp_1910[, .(count = .N,
                             decline = mean(base_decline_magnitude, na.rm = TRUE)), 
                         by = SCICP_START] # counting matched immigrants by county (1910)

county_shapefile_1900 <- st_read("1900_COUNTY_SHAPEFILE_HERE",
                                 stringsAsFactors = FALSE,
                                 as_tibble = TRUE) %>%
  st_simplify(dTolerance = 1000) %>%
  mutate(SCICP_START = paste0(ICPSRST, "-", ICPSRCTY)) %>%
  distinct(SCICP_START, .keep_all = TRUE) %>%
  left_join(scicp_1900,
            by = "SCICP_START") %>%
  mutate(count = ifelse(is.na(count), 0, count)) # loading shapefile and merging with count
county_shapefile_1910 <- st_read("1910_COUNTY_SHAPEFILE_HERE",
                                 stringsAsFactors = FALSE,
                                 as_tibble = TRUE) %>%
  st_simplify(dTolerance = 1000) %>%
  mutate(SCICP_START = paste0(ICPSRST, "-", ICPSRCTY)) %>%
  distinct(SCICP_START, .keep_all = TRUE) %>%
  left_join(scicp_1910,
            by = "SCICP_START") %>%
  mutate(count = ifelse(is.na(count), 0, count)) # loading shapefile and merging with count

county_shapefile_1900 %>%
  filter(ICPSRST <= 73) %>%
  ggplot() +
  geom_sf(aes(fill = log1p(count)), color = "gray10", size = 0.1) +
  scale_fill_gradientn("Matched European immigrants",
                       na.value = "white",
                       colours = brewer.pal(5, "Greys"),
                       breaks = c(0, 2.3, 4.6, 6.9, 9.2),
                       labels = c("0", "10", "100", "1,000", "10,000")) +
  coord_sf(datum = NA, expand = FALSE) +
  ggtitle("1900-10") +
  theme_map() +
  guides(fill = guide_legend(nrow = 1)) +
  theme(legend.position = "bottom",
        legend.text.align = 0.5,
        panel.grid.major = element_line(colour = "transparent"),
        legend.title = element_text(face = "bold"),
        legend.background = element_rect(fill = alpha("white", 0))) -> plot1900
county_shapefile_1910 %>%
  filter(ICPSRST <= 73) %>%
  ggplot() +
  geom_sf(aes(fill = log1p(count)), color = "gray10", size = 0.1) +
  scale_fill_gradientn("Matched European immigrants",
                       na.value = "white",
                       colours = brewer.pal(5, "Greys"),
                       breaks = c(0, 2.3, 4.6, 6.9, 9.2),
                       labels = c("0", "10", "100", "1,000", "10,000")) +
  coord_sf(datum = NA, expand = FALSE) +
  ggtitle("1910-20") +
  theme_map() +
  guides(fill = guide_legend(nrow = 1)) +
  theme(legend.position = "bottom",
        legend.text.align = 0.5,
        panel.grid.major = element_line(colour = "transparent"),
        legend.title = element_text(face = "bold"),
        legend.background = element_rect(fill = alpha("white", 0))) -> plot1910

ggarrange(plot1900,
          plot1910,
          nrow = 1,
          common.legend = TRUE,
          legend = "bottom") -> match_maps

ggsave(filename = "Figure 2.tiff",
       plot = match_maps,
       width = 8,
       height = 4,
       dpi = 600,
       bg = "white",
       units = "in")

# > Figure 3: naturalization ~ group concentration * shock intensity ####

felm(NATURALIZED_END ~ base_decline_magnitude_sqrt*(GROUP_COAL_PCTGROUP_START_SQRT+
                                                      BARTIK_MEAN_SQRT) | STATEICP_RANGE + SCICP_START | 0 | GROUP_SCICP,
     data = mlp_full,
     subset = CITIZEN_START == 3,
     weights = mlp_full[CITIZEN_START == 3]$weight2_coal) -> total_reg01a

felm(NATURALIZED_END ~ base_decline_magnitude_sqrt*(GROUP_COAL_PCTGROUP_START_SQRT + 
                                                      BARTIK_MEAN_SQRT + 
                                                      GROUP_PCTCOUNTY_START_SQRT + 
                                                      COAL_RELIANCE_START_SQRT +
                                                      RURAL_PCTCOUNTY_START + 
                                                      BLACK_PCTCOUNTY_START_SQRT + 
                                                      YRIMMIG_START +
                                                      INCWAGE_MEDIAN_START_LN + 
                                                      MARRIED_START) | STATEICP_RANGE + SCICP_START | 0 | GROUP_SCICP,
     data = mlp_full,
     subset = CITIZEN_START == 3,
     weights = mlp_full[CITIZEN_START == 3]$weight2_coal) -> total_reg01b

felm_int_plot(modelMin = total_reg01a,
              modelFull = total_reg01b,
              ymin = -.9, ymax = .42) -> main_plot

ggsave(filename = "Figure 3.tiff",
       plot = main_plot,
       width = 4,
       height = 4,
       dpi = 600,
       units = "in")

# > Figure 4: naturalization ~ group concentration * shock intensity (miners vs. non-miners) ####

# >> Immigrant miners ####

felm(NATURALIZED_END ~ base_decline_magnitude_sqrt*(GROUP_COAL_PCTGROUP_START_SQRT+
                                                      BARTIK_MEAN_SQRT) | STATEICP_RANGE + SCICP_START | 0 | GROUP_SCICP,
     data = mlp_coal_v2,
     weights = mlp_coal_v2$weight2_coal) -> baseline_coal_naturalized_min_reg

felm(NATURALIZED_END ~ base_decline_magnitude_sqrt*(GROUP_COAL_PCTGROUP_START_SQRT +
                                                      BARTIK_MEAN_SQRT + GROUP_PCTCOUNTY_START_SQRT + COAL_RELIANCE_START_SQRT +
                                                      RURAL_PCTCOUNTY_START + BLACK_PCTCOUNTY_START_SQRT + YRIMMIG_START +
                                                      INCWAGE_MEDIAN_START_LN + MARRIED_START) | STATEICP_RANGE + SCICP_START | 0 | GROUP_SCICP,
     data = mlp_coal_v2,
     weights = mlp_coal_v2$weight2_coal) -> baseline_coal_naturalized_full_reg

felm_int_plot(modelMin = baseline_coal_naturalized_min_reg,
              modelFull = baseline_coal_naturalized_full_reg,
              ymin = -1.2, ymax = 0.7,
              title = "Immigrant miners") -> baseline_coal_naturalized_plots

# >> Coal-adjacent immigrants ####

felm(NATURALIZED_END ~ base_decline_magnitude_sqrt*(GROUP_COAL_PCTGROUP_START_SQRT+
                                                      BARTIK_MEAN_SQRT) | STATEICP_RANGE + SCICP_START | 0 | GROUP_SCICP,
     data = mlp_noncoal_v2,
     weights = mlp_noncoal_v2$weight2_noncoal) -> baseline_noncoal_naturalized_min_reg

felm(NATURALIZED_END ~ base_decline_magnitude_sqrt*(GROUP_COAL_PCTGROUP_START_SQRT +
                                                      BARTIK_MEAN_SQRT + GROUP_PCTCOUNTY_START_SQRT + COAL_RELIANCE_START_SQRT +
                                                      RURAL_PCTCOUNTY_START + BLACK_PCTCOUNTY_START_SQRT + YRIMMIG_START +
                                                      INCWAGE_MEDIAN_START_LN + MARRIED_START) | STATEICP_RANGE + SCICP_START | 0 | GROUP_SCICP,
     data = mlp_noncoal_v2,
     weights = mlp_noncoal_v2$weight2_noncoal) -> baseline_noncoal_naturalized_full_reg

felm_int_plot(modelMin = baseline_noncoal_naturalized_min_reg,
              modelFull = baseline_noncoal_naturalized_full_reg,
              ymin = -1.2, ymax = 0.7,
              title = "Coal-adjacent immigrants") -> baseline_noncoal_naturalized_plots

ggarrange(baseline_coal_naturalized_plots,
          baseline_noncoal_naturalized_plots,
          ncol = 2) -> coal_and_noncoal_plots

ggsave(filename = "Figure 4.tiff",
       plot = coal_and_noncoal_plots,
       width = 8,
       height = 4,
       dpi = 600,
       units = "in")

# > Figure 5: naturalization ~ group concentration * shock intensity (electoral context) ####

# >> Electorally competitive ####

felm(NATURALIZED_END ~ base_decline_magnitude_sqrt*(GROUP_COAL_PCTGROUP_START_SQRT+
                                                      BARTIK_MEAN_SQRT) | STATEICP_RANGE + SCICP_START | 0 | GROUP_SCICP,
     data = mlp_full[!is.na(cong_dem_percent_START) & !is.na(cong_rep_percent_START)],
     subset = CITIZEN_START == 3 & GROUP_START != "" &
       (cong_dem_percent_START - cong_rep_percent_START > -12.6) & 
       !is.na(cong_dem_percent_START) & !is.na(cong_rep_percent_START),
     weights = mlp_full[CITIZEN_START == 3 & GROUP_START != "" &
                          (cong_dem_percent_START - cong_rep_percent_START > -12.6) & 
                          !is.na(cong_dem_percent_START) & !is.na(cong_rep_percent_START)]$weight2_coal) -> dem_reg01a

felm(NATURALIZED_END ~ base_decline_magnitude_sqrt*(GROUP_COAL_PCTGROUP_START_SQRT +
                                                      BARTIK_MEAN_SQRT + GROUP_PCTCOUNTY_START_SQRT + COAL_RELIANCE_START_SQRT +
                                                      RURAL_PCTCOUNTY_START + BLACK_PCTCOUNTY_START_SQRT + YRIMMIG_START +
                                                      INCWAGE_MEDIAN_START_LN + MARRIED_START) | STATEICP_RANGE + SCICP_START | 0 | GROUP_SCICP,
     data = mlp_full[!is.na(cong_dem_percent_START) & !is.na(cong_rep_percent_START)],
     subset = CITIZEN_START == 3 & GROUP_START != "" &
       (cong_dem_percent_START - cong_rep_percent_START > -12.6) & 
       !is.na(cong_dem_percent_START) & !is.na(cong_rep_percent_START),
     weights = mlp_full[CITIZEN_START == 3 & GROUP_START != "" &
                          (cong_dem_percent_START - cong_rep_percent_START > -12.6) & 
                          !is.na(cong_dem_percent_START) & !is.na(cong_rep_percent_START)]$weight2_coal) -> dem_reg01b

felm_int_plot(modelMin = dem_reg01a,
              modelFull = dem_reg01b,
              ymin = -1.05, ymax = 0.65,
              title = "More competitive") -> dem_plot

# >> Republican strongholds ####

felm(NATURALIZED_END ~ base_decline_magnitude_sqrt*(GROUP_COAL_PCTGROUP_START_SQRT+BARTIK_MEAN_SQRT) | STATEICP_RANGE + SCICP_START | 0 | GROUP_SCICP,
     data = mlp_full[!is.na(cong_dem_percent_START) & !is.na(cong_rep_percent_START)],
     subset = CITIZEN_START == 3 & GROUP_START != "" &
       (cong_dem_percent_START - cong_rep_percent_START <= -12.6) & 
       !is.na(cong_dem_percent_START) & !is.na(cong_rep_percent_START),
     weights = mlp_full[CITIZEN_START == 3 & GROUP_START != "" &
                          (cong_dem_percent_START - cong_rep_percent_START <= -12.6) & 
                          !is.na(cong_dem_percent_START) & !is.na(cong_rep_percent_START)]$weight2_coal) -> rep_reg01a

felm(NATURALIZED_END ~ base_decline_magnitude_sqrt*(GROUP_COAL_PCTGROUP_START_SQRT + 
                                                      BARTIK_MEAN_SQRT + 
                                                      GROUP_PCTCOUNTY_START_SQRT + 
                                                      COAL_RELIANCE_START_SQRT +
                                                      RURAL_PCTCOUNTY_START + 
                                                      BLACK_PCTCOUNTY_START_SQRT + 
                                                      YRIMMIG_START +
                                                      INCWAGE_MEDIAN_START_LN + 
                                                      MARRIED_START) | STATEICP_RANGE + SCICP_START | 0 | GROUP_SCICP,
     data = mlp_full[!is.na(cong_dem_percent_START) & !is.na(cong_rep_percent_START)],
     subset = CITIZEN_START == 3 & GROUP_START != "" &
       (cong_dem_percent_START - cong_rep_percent_START <= -12.6) & 
       !is.na(cong_dem_percent_START) & !is.na(cong_rep_percent_START),
     weights = mlp_full[CITIZEN_START == 3 & GROUP_START != "" &
                          (cong_dem_percent_START - cong_rep_percent_START <= -12.6) & 
                          !is.na(cong_dem_percent_START) & !is.na(cong_rep_percent_START)]$weight2_coal) -> rep_reg01b

felm_int_plot(modelMin = rep_reg01a,
              modelFull = rep_reg01b,
              ymin = -1.05, ymax = 0.65,
              title = "Republican strongholds") -> rep_plot

ggarrange(dem_plot,
          rep_plot,
          ncol = 2) -> dem_and_rep_plots

ggsave(filename = "Figure 5.tiff",
       plot = dem_and_rep_plots,
       width = 8,
       height = 4,
       dpi = 600,
       units = "in")

# > Figure 6: naturalization ~ group concentration * shock intensity (geography) ####

# >> Central Appalachia and the West ####
felm(NATURALIZED_END ~ base_decline_magnitude_sqrt*(GROUP_COAL_PCTGROUP_START_SQRT+
                                                      BARTIK_MEAN_SQRT) |
       STATEICP_RANGE + SCICP_START | 0 | GROUP_SCICP,
     data = mlp_full[STATEICP_START %in% c(40, 51, 54, 56:73) &
                       CITIZEN_START == 3 &
                       !is.na(GROUP_COAL_PCTGROUP_START_SQRT)],
     weights = mlp_full[STATEICP_START %in% c(40, 51, 54, 56:73) &
                          CITIZEN_START == 3 &
                          !is.na(GROUP_COAL_PCTGROUP_START_SQRT)]$weight2_coal) -> west_min

felm(NATURALIZED_END ~ base_decline_magnitude_sqrt*(GROUP_COAL_PCTGROUP_START_SQRT+
                                                      BARTIK_MEAN_SQRT+
                                                      YRIMMIG_START+
                                                      INCWAGE_MEDIAN_START_LN+
                                                      MARRIED_START+
                                                      GROUP_PCTCOUNTY_START_SQRT+
                                                      COAL_RELIANCE_START_SQRT+
                                                      RURAL_PCTCOUNTY_START+
                                                      BLACK_PCTCOUNTY_START_SQRT) |
       STATEICP_RANGE + SCICP_START | 0 | GROUP_SCICP,
     data = mlp_full[STATEICP_START %in% c(40, 51, 54, 56:73) &
                       CITIZEN_START == 3 &
                       !is.na(GROUP_COAL_PCTGROUP_START_SQRT)],
     weights = mlp_full[STATEICP_START %in% c(40, 51, 54, 56:73) &
                          CITIZEN_START == 3 &
                          !is.na(GROUP_COAL_PCTGROUP_START_SQRT)]$weight2_coal) -> west_full

felm_int_plot(modelMin = west_min,
              modelFull = west_full,
              ymin = -1, ymax = 0.5,
              title = "Central Appalachia and the West") -> west_plot

# >> Elsewhere ####
felm(NATURALIZED_END ~ base_decline_magnitude_sqrt*(GROUP_COAL_PCTGROUP_START_SQRT+
                                                      BARTIK_MEAN_SQRT) |
       STATEICP_RANGE + SCICP_START | 0 | GROUP_SCICP,
     data = mlp_full[(!STATEICP_START %in% c(40, 51, 54, 56:73)) &
                       CITIZEN_START == 3 &
                       !is.na(GROUP_COAL_PCTGROUP_START_SQRT)],
     weights = mlp_full[(!STATEICP_START %in% c(40, 51, 54, 56:73)) &
                          CITIZEN_START == 3 &
                          !is.na(GROUP_COAL_PCTGROUP_START_SQRT)]$weight2_coal) -> east_min

felm(NATURALIZED_END ~ base_decline_magnitude_sqrt*(GROUP_COAL_PCTGROUP_START_SQRT+
                                                      BARTIK_MEAN_SQRT+
                                                      YRIMMIG_START+
                                                      INCWAGE_MEDIAN_START_LN+
                                                      MARRIED_START+
                                                      GROUP_PCTCOUNTY_START_SQRT+
                                                      COAL_RELIANCE_START_SQRT+
                                                      RURAL_PCTCOUNTY_START+
                                                      BLACK_PCTCOUNTY_START_SQRT) |
       STATEICP_RANGE + SCICP_START | 0 | GROUP_SCICP,
     data = mlp_full[(!STATEICP_START %in% c(40, 51, 54, 56:73)) &
                       CITIZEN_START == 3 &
                       !is.na(GROUP_COAL_PCTGROUP_START_SQRT)],
     weights = mlp_full[(!STATEICP_START %in% c(40, 51, 54, 56:73)) &
                          CITIZEN_START == 3 &
                          !is.na(GROUP_COAL_PCTGROUP_START_SQRT)]$weight2_coal) -> east_full

felm_int_plot(modelMin = east_min,
              modelFull = east_full,
              ymin = -1, ymax = 0.5,
              title = "Elsewhere") -> east_plot

ggarrange(west_plot,
          east_plot,
          ncol = 2) -> west_and_east_plots

ggsave(filename = "Figure 6.tiff",
       plot = west_and_east_plots,
       width = 8,
       height = 4,
       dpi = 600,
       units = "in")

# > Figure 7: naturalization ~ group concentration * shock intensity (enclave position) ####

# >> More coethnic coworkers ####
felm(NATURALIZED_END ~ base_decline_magnitude_sqrt*(GROUP_COAL_PCTGROUP_START_SQRT+
                                                      BARTIK_MEAN_SQRT) | STATEICP_RANGE + SCICP_START | 0 | GROUP_SCICP,
     data = mlp_noncoal_v2,
     weights = mlp_noncoal_v2[IND_PCTIND > median(mlp_noncoal_v2$IND_PCTIND)]$weight2_coal,
     subset = IND_PCTIND > median(mlp_noncoal_v2$IND_PCTIND)) -> center_min

felm(NATURALIZED_END ~ base_decline_magnitude_sqrt*(GROUP_COAL_PCTGROUP_START_SQRT +
                                                      BARTIK_MEAN_SQRT + GROUP_PCTCOUNTY_START_SQRT + COAL_RELIANCE_START_SQRT +
                                                      RURAL_PCTCOUNTY_START + BLACK_PCTCOUNTY_START_SQRT + YRIMMIG_START +
                                                      INCWAGE_MEDIAN_START_LN + MARRIED_START) | STATEICP_RANGE + SCICP_START | 0 | GROUP_SCICP,
     data = mlp_noncoal_v2,
     weights = mlp_noncoal_v2[IND_PCTIND > median(mlp_noncoal_v2$IND_PCTIND)]$weight2_coal,
     subset = IND_PCTIND > median(mlp_noncoal_v2$IND_PCTIND)) -> center_full

felm_int_plot(modelMin = center_min,
              modelFull = center_full,
              ymin = -2.1, ymax = 1.15,
              title = "More coethnic coworkers") -> center_plot

# >> Fewer coethnic coworkers ####
felm(NATURALIZED_END ~ base_decline_magnitude_sqrt*(GROUP_COAL_PCTGROUP_START_SQRT+
                                                      BARTIK_MEAN_SQRT) | STATEICP_RANGE + SCICP_START | 0 | GROUP_SCICP,
     data = mlp_noncoal_v2,
     weights = mlp_noncoal_v2[IND_PCTIND <= median(mlp_noncoal_v2$IND_PCTIND)]$weight2_coal,
     subset = IND_PCTIND <= median(mlp_noncoal_v2$IND_PCTIND)) -> peripheral_min

felm(NATURALIZED_END ~ base_decline_magnitude_sqrt*(GROUP_COAL_PCTGROUP_START_SQRT +
                                                      BARTIK_MEAN_SQRT + GROUP_PCTCOUNTY_START_SQRT + COAL_RELIANCE_START_SQRT +
                                                      RURAL_PCTCOUNTY_START + BLACK_PCTCOUNTY_START_SQRT + YRIMMIG_START +
                                                      INCWAGE_MEDIAN_START_LN + MARRIED_START) | STATEICP_RANGE + SCICP_START | 0 | GROUP_SCICP,
     data = mlp_noncoal_v2,
     weights = mlp_noncoal_v2[IND_PCTIND <= median(mlp_noncoal_v2$IND_PCTIND)]$weight2_coal,
     subset = IND_PCTIND <= median(mlp_noncoal_v2$IND_PCTIND)) -> peripheral_full

felm_int_plot(modelMin = peripheral_min,
              modelFull = peripheral_full,
              ymin = -2.1, ymax = 1.15,
              title = "Fewer coethnic coworkers") -> peripheral_plot

ggarrange(center_plot,
          peripheral_plot,
          ncol = 2) -> center_and_peripheral_plots

ggsave(filename = "Figure 7.tiff",
       plot = center_and_peripheral_plots,
       width = 8,
       height = 4,
       dpi = 600,
       units = "in")

# ********************************************** #
################################################ #
#### *** FIGURES AND TABLES IN APPENDICES *** ####
################################################ #
# ********************************************** #

# > Figure A1: group concentration and future coal production changes ####

group_concentration_data <- fread("group_concentration.csv") # group concentration in coal data at county-year-group level

felm(sqrt(base_decline_magnitude) ~ coal_pct:as.factor(GROUP) | 0 | 0 | 0,
     data = group_concentration_data) %>%
  tidy() %>%
  filter(term != "(Intercept)") %>%
  filter(!is.nan(estimate)) %>%
  mutate(term = substr(term, 26, 36))  %>%
  filter(term != "luxembourgi") %>%
  ggplot(aes(x = term)) +
  geom_point(aes(y = estimate)) +
  geom_linerange(aes(ymin = estimate-1.96*std.error, ymax = estimate+1.96*std.error)) +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
  ylab("Difference in Group Concentration") +
  xlab("Group")

# > Figure A2: groups per county ####

mlp_full %>%
  filter(GROUP_START != "nativeblack" & GROUP_START != "nativewhite") %>% # limiting dataset to European immigrants
  group_by(SCICP_START, RANGE) %>%
  summarize(n = n(), # number of immigrants by county-year
            distinct_groups = n_distinct(GROUP_START)) %>% # number of distinct immigrant groups by county-year
  ggplot() +
  geom_density(aes(x = distinct_groups)) +
  geom_vline(aes(xintercept = mean(distinct_groups)), lty = 2) +
  theme_classic() +
  xlab("Distinct Groups Within County (by Census)") +
  ylab("Density")

mlp_full %>%
  filter(GROUP_START != "nativeblack" & GROUP_START != "nativewhite") %>% # limiting dataset to European immigrants
  group_by(SCICP_START, RANGE) %>%
  summarize(n = n(), # number of immigrants by county-year
            distinct_groups = n_distinct(GROUP_START)) %$% # number of distinct immigrant groups by county-year
  mean(distinct_groups > 1) # 99.4% figure in caption

# > Figure B1: yearly coal production by county ####

coal_agg <- read_rds("coal_agg.rds")

read_rds("coal_agg.rds") %>%
  filter(multiple_counties == 0) %>%
  filter(decadeProduction_chng_neg_weighted < quantile(coal_agg$base_decline_magnitude, na.rm = TRUE)[[2]]) %>%
  group_by(SCICP, decade_agg_year) %>%
  mutate(Production_rel = Production/dplyr::first(Production) * 100) %>%
  ungroup() %>%
  filter(decade_agg_year == 1910) %>%
  mutate(Year_rel = Year - decade_agg_year) %>%
  ggplot(aes(x = as.integer(Year), y = log(Production_rel), group = SCICP)) +
  geom_line() +
  geom_hline(yintercept = log(100), color = "red", lty = 2) +
  theme_classic() +
  theme(axis.text.x = element_blank(),
        axis.ticks = element_blank()) +
  ylab("") +
  xlab("") +
  scale_y_continuous(breaks=c(log(1), log(10), log(100), log(1000), log(10000)),
                     labels = c(1, 10, 100, 1000, 10000)) -> q1_10
read_rds("coal_agg.rds") %>%
  filter(multiple_counties == 0) %>%
  filter(decadeProduction_chng_neg_weighted < quantile(coal_agg$base_decline_magnitude, na.rm = TRUE)[[2]]) %>%
  group_by(SCICP, decade_agg_year) %>%
  mutate(Production_rel = Production/dplyr::first(Production) * 100) %>%
  ungroup() %>%
  filter(decade_agg_year == 1920) %>%
  mutate(Year_rel = Year - decade_agg_year) %>%
  ggplot(aes(x = as.integer(Year), y = log(Production_rel), group = SCICP)) +
  geom_line() +
  geom_hline(yintercept = log(100), color = "red", lty = 2) +
  theme_classic() +
  theme(axis.text.x = element_blank(),
        axis.ticks = element_blank()) +
  ylab("") +
  xlab("") +
  scale_y_continuous(breaks=c(log(1), log(10), log(100), log(1000), log(10000)),
                     labels = c(1, 10, 100, 1000, 10000)) -> q1_20

read_rds("coal_agg.rds") %>%
  filter(multiple_counties == 0) %>%
  filter(decadeProduction_chng_neg_weighted >= quantile(coal_agg$base_decline_magnitude, na.rm = TRUE)[[2]] &
           decadeProduction_chng_neg_weighted < quantile(coal_agg$base_decline_magnitude, na.rm = TRUE)[[3]]) %>%
  group_by(SCICP, decade_agg_year) %>%
  mutate(Production_rel = Production/dplyr::first(Production) * 100) %>%
  ungroup() %>%
  filter(decade_agg_year == 1910) %>%
  mutate(Year_rel = Year - decade_agg_year) %>%
  ggplot(aes(x = as.integer(Year), y = log(Production_rel), group = SCICP)) +
  geom_line() +
  geom_hline(yintercept = log(100), color = "red", lty = 2) +
  theme_classic() +
  theme(axis.text.x = element_blank(),
        axis.ticks = element_blank()) +
  ylab("") +
  xlab("") +
  scale_y_continuous(breaks=c(log(1), log(10), log(100), log(1000), log(10000)),
                     labels = c(1, 10, 100, 1000, 10000)) -> q2_10
read_rds("coal_agg.rds") %>%
  filter(multiple_counties == 0) %>%
  filter(decadeProduction_chng_neg_weighted >= quantile(coal_agg$base_decline_magnitude, na.rm = TRUE)[[2]] &
           decadeProduction_chng_neg_weighted < quantile(coal_agg$base_decline_magnitude, na.rm = TRUE)[[3]]) %>%
  group_by(SCICP, decade_agg_year) %>%
  mutate(Production_rel = Production/dplyr::first(Production) * 100) %>%
  ungroup() %>%
  filter(decade_agg_year == 1920) %>%
  mutate(Year_rel = Year - decade_agg_year) %>%
  ggplot(aes(x = as.integer(Year), y = log(Production_rel), group = SCICP)) +
  geom_line() +
  geom_hline(yintercept = log(100), color = "red", lty = 2) +
  theme_classic() +
  theme(axis.text.x = element_blank(),
        axis.ticks = element_blank()) +
  ylab("") +
  xlab("") +
  scale_y_continuous(breaks=c(log(1), log(10), log(100), log(1000), log(10000)),
                     labels = c(1, 10, 100, 1000, 10000)) -> q2_20

read_rds("coal_agg.rds") %>%
  filter(multiple_counties == 0) %>%
  filter(decadeProduction_chng_neg_weighted >= quantile(coal_agg$base_decline_magnitude, na.rm = TRUE)[[3]] &
           decadeProduction_chng_neg_weighted < quantile(coal_agg$base_decline_magnitude, na.rm = TRUE)[[4]]) %>%
  group_by(SCICP, decade_agg_year) %>%
  mutate(Production_rel = Production/dplyr::first(Production) * 100) %>%
  ungroup() %>%
  filter(decade_agg_year == 1910) %>%
  mutate(Year_rel = Year - decade_agg_year) %>%
  ggplot(aes(x = as.integer(Year), y = log(Production_rel), group = SCICP)) +
  geom_line() +
  geom_hline(yintercept = log(100), color = "red", lty = 2) +
  theme_classic() +
  theme(axis.text.x = element_blank(),
        axis.ticks = element_blank()) +
  ylab("") +
  xlab("") +
  scale_y_continuous(breaks=c(log(1), log(10), log(100), log(1000), log(10000)),
                     labels = c(1, 10, 100, 1000, 10000)) -> q3_10
read_rds("coal_agg.rds") %>%
  filter(multiple_counties == 0) %>%
  filter(decadeProduction_chng_neg_weighted >= quantile(coal_agg$base_decline_magnitude, na.rm = TRUE)[[3]] &
           decadeProduction_chng_neg_weighted < quantile(coal_agg$base_decline_magnitude, na.rm = TRUE)[[4]]) %>%
  group_by(SCICP, decade_agg_year) %>%
  mutate(Production_rel = Production/dplyr::first(Production) * 100) %>%
  ungroup() %>%
  filter(decade_agg_year == 1920) %>%
  mutate(Year_rel = Year - decade_agg_year) %>%
  ggplot(aes(x = as.integer(Year), y = log(Production_rel), group = SCICP)) +
  geom_line() +
  geom_hline(yintercept = log(100), color = "red", lty = 2) +
  theme_classic() +
  theme(axis.text.x = element_blank(),
        axis.ticks = element_blank()) +
  ylab("") +
  xlab("") +
  scale_y_continuous(breaks=c(log(1), log(10), log(100), log(1000), log(10000)),
                     labels = c(1, 10, 100, 1000, 10000)) -> q3_20

read_rds("coal_agg.rds") %>%
  filter(multiple_counties == 0) %>%
  filter(decadeProduction_chng_neg_weighted >= quantile(coal_agg$base_decline_magnitude, na.rm = TRUE)[[4]] &
           decadeProduction_chng_neg_weighted < quantile(coal_agg$base_decline_magnitude, na.rm = TRUE)[[5]]) %>%
  group_by(SCICP, decade_agg_year) %>%
  mutate(Production_rel = Production/dplyr::first(Production) * 100) %>%
  ungroup() %>%
  filter(decade_agg_year == 1910) %>%
  mutate(Year_rel = Year - decade_agg_year) %>%
  ggplot(aes(x = as.integer(Year), y = log(Production_rel), group = SCICP)) +
  geom_line() +
  geom_hline(yintercept = log(100), color = "red", lty = 2) +
  theme_classic() +
  theme(axis.text.x = element_blank(),
        axis.ticks = element_blank()) +
  ylab("") +
  xlab("") +
  scale_y_continuous(breaks=c(log(1), log(10), log(100), log(1000), log(10000)),
                     labels = c(1, 10, 100, 1000, 10000)) -> q4_10
read_rds("coal_agg.rds") %>%
  filter(multiple_counties == 0) %>%
  filter(decadeProduction_chng_neg_weighted >= quantile(coal_agg$base_decline_magnitude, na.rm = TRUE)[[4]] &
           decadeProduction_chng_neg_weighted < quantile(coal_agg$base_decline_magnitude, na.rm = TRUE)[[5]]) %>%
  group_by(SCICP, decade_agg_year) %>%
  mutate(Production_rel = Production/dplyr::first(Production) * 100) %>%
  ungroup() %>%
  filter(decade_agg_year == 1920) %>%
  mutate(Year_rel = Year - decade_agg_year) %>%
  ggplot(aes(x = as.integer(Year), y = log(Production_rel), group = SCICP)) +
  geom_line() +
  geom_hline(yintercept = log(100), color = "red", lty = 2) +
  theme_classic() +
  theme(axis.text.x = element_blank(),
        axis.ticks = element_blank()) +
  ylab("") +
  xlab("") +
  scale_y_continuous(breaks=c(log(1), log(10), log(100), log(1000), log(10000)),
                     labels = c(1, 10, 100, 1000, 10000)) -> q4_20

ggarrange(q1_10, q2_10, q3_10, q4_10,
          q1_20, q2_20, q3_20, q4_20,
          nrow = 2,
          ncol = 4)

# > Figure B2: map of shock intensity ####

county_shapefile_1900 %>%
  filter(!ICPSRST %in% c(81, 82)) %>%
  ggplot() +
  geom_sf(aes(fill = decline), color = "gray95", size = 0) +
  scale_fill_gradientn("Coal shock intensity",
                       na.value = "#fff7ec",
                       colours = brewer.pal(5, "OrRd")) +
  coord_sf(datum = NA, expand = F) +
  ggtitle("1900-10") +
  theme_map() +
  guides(fill = guide_legend(nrow = 1)) +
  theme(legend.position = "bottom",
        legend.text.align = 0.5,
        panel.grid.major = element_line(colour = "transparent"),
        legend.title = element_text(face = "bold"),
        legend.background = element_rect(fill = alpha("white", 0))) -> plot1900decline

county_shapefile_1910 %>%
  filter(!ICPSRST %in% c(81, 82) & !is.na(ICPSRST)) %>%
  ggplot() +
  geom_sf(aes(fill = decline), color = "gray95", size = 0) +
  scale_fill_gradientn("Coal shock intensity",
                       na.value = "#fff7ec",
                       colours = brewer.pal(5, "OrRd")) +
  coord_sf(datum = NA, expand = F) +
  ggtitle("1910-20") +
  theme_map() +
  guides(fill = guide_legend(nrow = 1)) +
  theme(legend.position = "bottom",
        legend.text.align = 0.5,
        panel.grid.major = element_line(colour = "transparent"),
        legend.title = element_text(face = "bold"),
        legend.background = element_rect(fill = alpha("white", 0))) -> plot1910decline

ggarrange(plot1900decline,
          plot1910decline,
          ncol = 2,
          common.legend = TRUE,
          legend = "bottom")

# > Figure B3: histogram of year-over-year production changes ####

read_rds("coal_agg.rds") %>%
  filter(multiple_counties == 0) %>% 
  filter(Year %in% 1900:1920) %>%
  mutate(Production_chng = Production_chng*100) %>%
  ggplot() +
  geom_histogram(aes(x = Production_chng)) +
  xlim(-300, 300) +
  theme_classic() +
  xlab("Year-over-Year Production Change (%)") +
  ylab("Count")

# > Figure D1: match rates among European immigrants by county ####

match_rates_by_county <- fread("match_rates_by_county.csv") # match rate by county-decade

match_rates_by_county %>%
  filter(RANGE == 10) %>%
  arrange(MATCH_RATE) %>%
  mutate(id = 1:nrow(.)) %>%
  ggplot(aes(x = id, y = MATCH_RATE)) +
  geom_point() +
  ggtitle("1900-10") +
  ylab("Match Rate") +
  xlab("County") +
  theme_minimal() -> match_plot0010

match_rates_by_county %>%
  filter(RANGE == 1020) %>%
  arrange(MATCH_RATE) %>%
  mutate(id = 1:nrow(.)) %>%
  ggplot(aes(x = id, y = MATCH_RATE)) +
  geom_point() +
  ggtitle("1910-20") +
  ylab("Match Rate") +
  xlab("County") +
  theme_minimal() -> match_plot1020

ggarrange(match_plot0010,
          match_plot1020,
          ncol = 2)

# > Table D1: summary statistics for full dataset ####

mlp_summary <- mlp_full[GROUP_START != "" & GROUP_START != "nativeblack" & GROUP_START != "nativewhite"] # limiting to European immigrants
mlp_summary <- mlp_summary[, BARTIK_MEAN := abs(BARTIK_MEAN)] # making Bartik-estimated general economic decline values positive for comparability with coal decline variable
mlp_summary <- mlp_summary[AGE_START >= 21] # limiting to immigrants of at least 21 years of age
mlp_summary <- mlp_summary[, c("GROUP_COAL_PCTGROUP_START",
                               "base_decline_magnitude",
                               "BARTIK_MEAN",
                               "NATURALIZED_END",
                               "INTERMARRIAGE_NWHITE_END",
                               "INTERMARRIAGE_IMMIG_END",
                               "SPEAKENG_END",
                               "YRIMMIG_START",
                               "INCWAGE_MEDIAN_START",
                               "MALE",
                               "MARRIED_START",
                               "COAL_FAM_START",
                               "GROUP_PCTCOUNTY_START",
                               "COAL_RELIANCE_START",
                               "RURAL_PCTCOUNTY_START",
                               "BLACK_PCTCOUNTY_START"), with = FALSE]
stargazer(mlp_summary)

# > Table D2: mean characteristics of matched/unmatched European immigrants ####

# Matched immigrants; unmatched immigrants
coalcounties1900 <- read_rds("coalproducingcounties1900.rds") # list of coal producing counties in 1900
coalcounties1910 <- read_rds("coalproducingcounties1910.rds") # list of coal producing counties in 1910

c1900 <- fread("1900_FULL_COUNT_FILE_HERE",
               select = c(HISTID = "character",
                          STATEICP = "character",
                          COUNTYICP = "character",
                          SEX = "integer",
                          AGE = "integer",
                          OCCSCORE = "integer",
                          LIT = "integer",
                          URBAN = "integer",
                          CITIZEN = "integer",
                          BPL = "integer"))
c1900 <- c1900[HISTID != ""]
c1900 <- unique(c1900, by = "HISTID")
c1900 <- c1900[, SCICP := paste0(STATEICP, "-", COUNTYICP)]
c1900 <- c1900[SCICP %in% coalcounties1900]

mlp0010 <- fread("1900-10_MLP_FILE_HERE")
mlp0010 <- mlp0010[, matched := 1]

c1900 <- merge(x = c1900,
               y = mlp0010,
               by.x = "HISTID",
               by.y = "histid_1900",
               all.x = T)

c1900 <- c1900[, matched := if_else(is.na(matched), 0, 1)]

c1900 <- c1900[BPL %in% 400:499]

c1910 <- fread("1910_FULL_COUNT_FILE_HERE",
               select = c(HISTID = "character",
                          STATEICP = "character",
                          COUNTYICP = "character",
                          SEX = "integer",
                          AGE = "integer",
                          OCCSCORE = "integer",
                          LIT = "integer",
                          URBAN = "integer",
                          CITIZEN = "integer",
                          BPL = "integer"))
c1910 <- c1910[HISTID != ""]
c1910 <- unique(c1910, by = "HISTID")
c1910 <- c1910[, SCICP := paste0(STATEICP, "-", COUNTYICP)]
c1910 <- c1910[SCICP %in% coalcounties1910]

mlp1020 <- fread("1910-20_MLP_FILE_HERE")
mlp1020 <- mlp1020[, matched := 1]

c1910 <- merge(x = c1910,
               y = mlp1020,
               by.x = "HISTID",
               by.y = "histid_1910",
               all.x = T)

c1910 <- c1910[, matched := if_else(is.na(matched), 0, 1)]

c1910 <- c1910[BPL %in% 400:499]

balance <- rbindlist(l = list(c1900[, c("STATEICP", "SCICP", "SEX", "AGE", "OCCSCORE", "LIT", "URBAN", "CITIZEN", "BPL", "matched", "round"), 
                                    with = FALSE], 
                              c1910[, c("STATEICP", "SCICP", "SEX", "AGE", "OCCSCORE", "LIT", "URBAN", "CITIZEN", "BPL", "matched", "round"), 
                                    with = FALSE]),
                     idcol = TRUE)
balance <- balance[AGE >= 21]

mean(balance$matched); 1-mean(balance$matched)
mean(balance[matched == 1]$AGE); mean(balance[matched == 0]$AGE)
mean(balance[matched == 1]$SEX == 2); mean(balance[matched == 0]$SEX == 2)
mean(balance[matched == 1]$LIT == 4); mean(balance[matched == 0]$LIT == 4)
mean(balance[matched == 1]$URBAN == 1); mean(balance[matched == 0]$URBAN == 1)
mean(balance[matched == 1]$OCCSCORE); mean(balance[matched == 0]$OCCSCORE)
mean(balance[matched == 1]$CITIZEN %in% c(2, 4)); mean(balance[matched == 0]$CITIZEN %in% c(2, 4))

rm(c1900); rm(c1910)

# Immigrants elsewhere (not in coal-producing areas)
c1900 <- fread("1900_FULL_COUNT_FILE_HERE",
               select = c(STATEICP = "character",
                          COUNTYICP = "character",
                          SEX = "integer",
                          AGE = "integer",
                          OCCSCORE = "integer",
                          LIT = "integer",
                          URBAN = "integer",
                          CITIZEN = "integer",
                          BPL = "integer"))
c1900 <- c1900[BPL %in% 400:499 & AGE >= 21]
c1900 <- c1900[, SCICP := paste0(STATEICP, "-", COUNTYICP)]
c1900 <- c1900[! SCICP %in% coalcounties1900]

c1910 <- fread("1910_FULL_COUNT_FILE_HERE",
               select = c(STATEICP = "character",
                          COUNTYICP = "character",
                          SEX = "integer",
                          AGE = "integer",
                          OCCSCORE = "integer",
                          LIT = "integer",
                          URBAN = "integer",
                          CITIZEN = "integer",
                          BPL = "integer"))
c1910 <- c1910[BPL %in% 400:499 & AGE >= 21]
c1910 <- c1910[, SCICP := paste0(STATEICP, "-", COUNTYICP)]
c1910 <- c1910[! SCICP %in% coalcounties1910]

balance2 <- rbindlist(l = list(c1900,
                               c1910),
                      idcol = TRUE)

mean(balance2$AGE)
mean(balance2$SEX == 2)
mean(balance2$LIT == 4)
mean(balance2$URBAN == 1)
mean(balance2$OCCSCORE)
mean(balance2$CITIZEN %in% c(2, 4))

# > Figure D2: group match rate ~ group concentration * shock intensity ####

mlp_full_abridged <- mlp_full[, c("SCICP_START", "GROUP_START", "GROUP_SCICP",
                                  "STATICP_RANGE", "RANGE", "base_decline_magnitude_sqrt",
                                  "GROUP_COAL_PCTGROUP_START_SQRT", "GROUP_COUNTY_START"),
                              with = FALSE]

mlp_full_abridged <- mlp_full_abridged[, .(base_decline_magnitude_sqrt = mean(base_decline_magnitude_sqrt),
                                           GROUP_COAL_PCTGROUP_START_SQRT = mean(GROUP_COAL_PCTGROUP_START_SQRT),
                                           GROUP_COUNTY_START = mean(GROUP_COUNTY_START),
                                           GROUP_COUNTY_MATCHED = .N),
                                       by = list(SCICP_START, RANGE, GROUP_START, GROUP_SCICP, STATEICP_RANGE)]
mlp_full_abridged <- mlp_full_abridged[, GROUP_MATCH_RATE := GROUP_COUNTY_MATCHED/GROUP_COUNTY_START]
mlp_full_abridged <- mlp_full_abridged[! GROUP_START %in% c("nativewhite", "nativeblack", "")]

interflex(data = mlp_full_abridged,
          Y = "GROUP_MATCH_RATE",
          X = "base_decline_magnitude_sqrt",
          D = "GROUP_COAL_PCTGROUP_START_SQRT",
          FE = c("STATEICP_RANGE", "SCICP_START"),
          cl = "GROUP_SCICP",
          estimator = "binning",
          na.rm = TRUE,
          theme.bw = TRUE,
          xlab = "Shock intensity",
          ylab = "Marginal effect of group concentration on match rate",
          nbins = 5)

# > Table E1 (left panel): table for Figure 3 in manuscript ####

# -- refer to code above for Figure 3

# > Table E1 (right panel): interacting beginning-of-period covariates with year fixed effects ####

felm(NATURALIZED_END ~ base_decline_magnitude_sqrt*(GROUP_COAL_PCTGROUP_START_SQRT) + 
       BARTIK_MEAN_SQRT + 
       GROUP_PCTCOUNTY_START_SQRT:as.factor(RANGE) + 
       COAL_RELIANCE_START_SQRT:as.factor(RANGE) +
       RURAL_PCTCOUNTY_START:as.factor(RANGE) + 
       BLACK_PCTCOUNTY_START_SQRT:as.factor(RANGE) + 
       YRIMMIG_START:as.factor(RANGE) +
       INCWAGE_MEDIAN_START_LN:as.factor(RANGE) + 
       MARRIED_START:as.factor(RANGE) | STATEICP_START + SCICP_START | 0 | GROUP_SCICP,
     data = mlp_full,
     subset = CITIZEN_START == 3,
     weights = mlp_full[CITIZEN_START == 3]$weight2_coal) %>%
  tidy()

# > Table E2: naturalization ~ shock intensity (split sample by group concentration) ####

# >> High exposure ####

felm(NATURALIZED_END ~ base_decline_magnitude_sqrt + 
       BARTIK_MEAN_SQRT + YRIMMIG_START + INCWAGE_MEDIAN_START_LN +
       MARRIED_START + GROUP_PCTCOUNTY_START_SQRT + COAL_RELIANCE_START_SQRT +
       RURAL_PCTCOUNTY_START + BLACK_PCTCOUNTY_START_SQRT | STATEICP_RANGE | 0 | SCICP_START,
     data = mlp_full[CITIZEN_START == 3 & !is.na(GROUP_COAL_PCTGROUP_START_SQRT) & GROUP_COAL_PCTGROUP_START >= median(mlp_full[CITIZEN_START == 3 & !is.na(GROUP_COAL_PCTGROUP_START_SQRT)]$GROUP_COAL_PCTGROUP_START)],
     weights = mlp_full[CITIZEN_START == 3 & !is.na(GROUP_COAL_PCTGROUP_START_SQRT) & GROUP_COAL_PCTGROUP_START >= median(mlp_full[CITIZEN_START == 3 & !is.na(GROUP_COAL_PCTGROUP_START_SQRT)]$GROUP_COAL_PCTGROUP_START)]$weight2_coal) %>%
  tidy()

# >> Low exposure ####

felm(NATURALIZED_END ~ base_decline_magnitude_sqrt + 
       BARTIK_MEAN_SQRT + YRIMMIG_START + INCWAGE_MEDIAN_START_LN +
       MARRIED_START + GROUP_PCTCOUNTY_START_SQRT + COAL_RELIANCE_START_SQRT +
       RURAL_PCTCOUNTY_START + BLACK_PCTCOUNTY_START_SQRT | STATEICP_RANGE | 0 | SCICP_START,
     data = mlp_full[CITIZEN_START == 3 & !is.na(GROUP_COAL_PCTGROUP_START_SQRT) & GROUP_COAL_PCTGROUP_START < median(mlp_full[CITIZEN_START == 3 & !is.na(GROUP_COAL_PCTGROUP_START_SQRT)]$GROUP_COAL_PCTGROUP_START)],
     weights = mlp_full[CITIZEN_START == 3 & !is.na(GROUP_COAL_PCTGROUP_START_SQRT) & GROUP_COAL_PCTGROUP_START < median(mlp_full[CITIZEN_START == 3 & !is.na(GROUP_COAL_PCTGROUP_START_SQRT)]$GROUP_COAL_PCTGROUP_START)]$weight2_coal) %>%
  tidy()

# > Table E3 (left panel): table for Figure 4 in manuscript ####

# -- refer to code above for Figure 4

# > Table E3 (right panel): table for Figure 5 in manuscript ####

# -- refer to code above for Figure 5

# > Table E4: table for Figure 6 in manuscript ####

# -- refer to code above for Figure 6

# > Table E5: table for Figure 7 in manuscript ####

# -- refer to code above for Figure 7

# > Figure F1: social assimilation interaction plots ####

# >> Marriage to native-born white citizen ####

felm(INTERMARRIAGE_NWHITE_END ~ base_decline_magnitude_sqrt*(GROUP_COAL_PCTGROUP_START_SQRT+
                                                               BARTIK_MEAN_SQRT) | STATEICP_RANGE + SCICP_START | 0 | GROUP_SCICP,
     data = mlp_full,
     subset = CITIZEN_START == 3 & MARRIED_START == 0,
     weights = mlp_full[CITIZEN_START == 3 & MARRIED_START == 0]$weight2_coal) -> marriage_nwhite_min

felm(INTERMARRIAGE_NWHITE_END ~ base_decline_magnitude_sqrt*(GROUP_COAL_PCTGROUP_START_SQRT+
                                                               BARTIK_MEAN_SQRT+
                                                               YRIMMIG_START+
                                                               MALE+
                                                               GROUP_PCTCOUNTY_START_SQRT+
                                                               COAL_RELIANCE_START_SQRT+
                                                               RURAL_COUNTY_START+
                                                               BLACK_PCTCOUNTY_START_SQRT) | STATEICP_RANGE + SCICP_START | 0 | GROUP_SCICP,
     data = mlp_full,
     subset = CITIZEN_START == 3 & MARRIED_START == 0,
     weights = mlp_full[CITIZEN_START == 3 & MARRIED_START == 0]$weight2_coal) -> marriage_nwhite_full

felm_int_plot(modelMin = marriage_nwhite_min,
              modelFull = marriage_nwhite_full,
              ymin = -.19, ymax = .12,
              title = "Marriage to native-born white citizen") -> marriage_nwhite_plot

# >> Marriage to non-coethnic immigrant ####

felm(INTERMARRIAGE_IMMIG_END ~ base_decline_magnitude_sqrt*(GROUP_COAL_PCTGROUP_START_SQRT+
                                                              BARTIK_MEAN_SQRT) | STATEICP_RANGE + SCICP_START | 0 | GROUP_SCICP,
     data = mlp_full,
     subset = CITIZEN_START == 3 & MARRIED_START == 0,
     weights = mlp_full[CITIZEN_START == 3 & MARRIED_START == 0]$weight2_coal) -> marriage_immig_min

felm(INTERMARRIAGE_IMMIG_END ~ base_decline_magnitude_sqrt*(GROUP_COAL_PCTGROUP_START_SQRT+
                                                              BARTIK_MEAN_SQRT+
                                                              YRIMMIG_START+
                                                              MALE+
                                                              GROUP_PCTCOUNTY_START_SQRT+
                                                              COAL_RELIANCE_START_SQRT+
                                                              RURAL_COUNTY_START+
                                                              BLACK_PCTCOUNTY_START_SQRT) | STATEICP_RANGE + SCICP_START | 0 | GROUP_SCICP,
     data = mlp_full,
     subset = CITIZEN_START == 3 & MARRIED_START == 0,
     weights = mlp_full[CITIZEN_START == 3 & MARRIED_START == 0]$weight2_coal) -> marriage_immig_full

felm_int_plot(modelMin = marriage_immig_min,
              modelFull = marriage_immig_full,
              ymin = -.2, ymax = .17,
              title = "Marriage to non-coethnic immigrant") -> marriage_immig_plot

# >> Learning English ####

felm(SPEAKENG_END ~ base_decline_magnitude_sqrt*(GROUP_COAL_PCTGROUP_START_SQRT+
                                                   BARTIK_MEAN_SQRT) | STATEICP_RANGE + SCICP_START | 0 | GROUP_SCICP,
     data = mlp_full[SPEAKENG_START == 0 & !is.na(SPEAKENG_END) & !is.na(GROUP_COAL_PCTGROUP_START_SQRT)],
     weights = mlp_full[SPEAKENG_START == 0 & !is.na(SPEAKENG_END) & !is.na(GROUP_COAL_PCTGROUP_START_SQRT)]$weight2_coal) -> lang_min

felm(SPEAKENG_END ~ base_decline_magnitude_sqrt*(GROUP_COAL_PCTGROUP_START_SQRT+
                                                   BARTIK_MEAN_SQRT+
                                                   YRIMMIG_START+
                                                   MARRIED_START+
                                                   MALE+
                                                   GROUP_PCTCOUNTY_START_SQRT+
                                                   COAL_RELIANCE_START_SQRT+
                                                   RURAL_COUNTY_START+
                                                   BLACK_PCTCOUNTY_START_SQRT) | STATEICP_RANGE + SCICP_START | 0 | GROUP_SCICP,
     data = mlp_full[SPEAKENG_START == 0 & !is.na(SPEAKENG_END) & !is.na(GROUP_COAL_PCTGROUP_START_SQRT)],
     weights = mlp_full[SPEAKENG_START == 0 & !is.na(SPEAKENG_END) & !is.na(GROUP_COAL_PCTGROUP_START_SQRT)]$weight2_coal) -> lang_full

felm_int_plot(modelMin = lang_min,
              modelFull = lang_full,
              ymin = -.36, ymax = .2,
              title = "Learning English") -> lang_plot

ggarrange(marriage_nwhite_plot,
          marriage_immig_plot,
          lang_plot,
          nrow = 1)

# > Figure G1: differentiating long-/short-term immigrants ####

# >> Long term ####
felm(NATURALIZED_END ~ base_decline_magnitude_sqrt*(GROUP_COAL_PCTGROUP_START_SQRT+
                                                      BARTIK_MEAN_SQRT) |
       STATEICP_RANGE + SCICP_START | 0 | GROUP_SCICP,
     data = mlp_full[CITIZEN_START == 3 & GROUP_START != "" &
                       !is.na(GROUP_COAL_PCTGROUP_START_SQRT) &
                       ((RANGE == 10 & YRIMMIG_START < 1895) | (RANGE == 1020 & YRIMMIG_START < 1905))],
     weights = mlp_full[CITIZEN_START == 3 & GROUP_START != "" &
                          !is.na(GROUP_COAL_PCTGROUP_START_SQRT) &
                          ((RANGE == 10 & YRIMMIG_START < 1895) | (RANGE == 1020 & YRIMMIG_START < 1905))]$weight2_coal) -> long_min

felm(NATURALIZED_END ~ base_decline_magnitude_sqrt*(GROUP_COAL_PCTGROUP_START_SQRT+
                                                      BARTIK_MEAN_SQRT+
                                                      YRIMMIG_START+
                                                      INCWAGE_MEDIAN_START_LN+
                                                      MARRIED_START+
                                                      GROUP_PCTCOUNTY_START_SQRT+
                                                      COAL_RELIANCE_START_SQRT+
                                                      RURAL_PCTCOUNTY_START+
                                                      BLACK_PCTCOUNTY_START_SQRT) |
       STATEICP_RANGE + SCICP_START | 0 | GROUP_SCICP,
     data = mlp_full[CITIZEN_START == 3 & GROUP_START != "" &
                       !is.na(GROUP_COAL_PCTGROUP_START_SQRT) &
                       ((RANGE == 10 & YRIMMIG_START < 1895) | (RANGE == 1020 & YRIMMIG_START < 1905))],
     weights = mlp_full[CITIZEN_START == 3 & GROUP_START != "" &
                          !is.na(GROUP_COAL_PCTGROUP_START_SQRT) &
                          ((RANGE == 10 & YRIMMIG_START < 1895) | (RANGE == 1020 & YRIMMIG_START < 1905))]$weight2_coal) -> long_full

felm_int_plot(modelMin = long_min,
              modelFull = long_full,
              ymin = -0.9, ymax = 0.6,
              title = "Long-term immigrants") -> long_plot

# >> Short term ####
felm(NATURALIZED_END ~ base_decline_magnitude_sqrt*(GROUP_COAL_PCTGROUP_START_SQRT+
                                                      BARTIK_MEAN_SQRT) |
       STATEICP_RANGE + SCICP_START | 0 | GROUP_SCICP,
     data = mlp_full[CITIZEN_START == 3 & GROUP_START != "" &
                       !is.na(GROUP_COAL_PCTGROUP_START_SQRT) &
                       ((RANGE == 10 & YRIMMIG_START >= 1895) | (RANGE == 1020 & YRIMMIG_START >= 1905))],
     weights = mlp_full[CITIZEN_START == 3 & GROUP_START != "" &
                          !is.na(GROUP_COAL_PCTGROUP_START_SQRT) &
                          ((RANGE == 10 & YRIMMIG_START >= 1895) | (RANGE == 1020 & YRIMMIG_START >= 1905))]$weight2_coal) -> short_min

felm(NATURALIZED_END ~ base_decline_magnitude_sqrt*(GROUP_COAL_PCTGROUP_START_SQRT+
                                                      BARTIK_MEAN_SQRT+
                                                      YRIMMIG_START+
                                                      INCWAGE_MEDIAN_START_LN+
                                                      MARRIED_START+
                                                      GROUP_PCTCOUNTY_START_SQRT+
                                                      COAL_RELIANCE_START_SQRT+
                                                      RURAL_PCTCOUNTY_START+
                                                      BLACK_PCTCOUNTY_START_SQRT) |
       STATEICP_RANGE + SCICP_START | 0 | GROUP_SCICP,
     data = mlp_full[CITIZEN_START == 3 & GROUP_START != "" &
                       !is.na(GROUP_COAL_PCTGROUP_START_SQRT) &
                       ((RANGE == 10 & YRIMMIG_START >= 1895) | (RANGE == 1020 & YRIMMIG_START >= 1905))],
     weights = mlp_full[CITIZEN_START == 3 & GROUP_START != "" &
                          !is.na(GROUP_COAL_PCTGROUP_START_SQRT) &
                          ((RANGE == 10 & YRIMMIG_START >= 1895) | (RANGE == 1020 & YRIMMIG_START >= 1905))]$weight2_coal) -> short_full

felm_int_plot(modelMin = short_min,
              modelFull = short_full,
              ymin = -0.9, ymax = 0.6,
              title = "Short-term immigrants") -> short_plot

ggarrange(long_plot,
          short_plot,
          ncol = 2) -> long_and_short_plots

# > Figure G2: entry of coal-adjacent immigrants into coal ####

# >> Dropping those at least 41yo (75th percentile of coal miner ages) ####

felm(NATURALIZED_END ~ base_decline_magnitude_sqrt*(GROUP_COAL_PCTGROUP_START_SQRT+
                                                      BARTIK_MEAN_SQRT) |
       STATEICP_RANGE + SCICP_START | 0 | GROUP_SCICP,
     data = mlp_noncoal_v2[!is.na(GROUP_COAL_PCTGROUP_START_SQRT) &
                             AGE_START >= 41],
     weights = mlp_noncoal_v2[!is.na(GROUP_COAL_PCTGROUP_START_SQRT) &
                                AGE_START >= 41]$weight2_coal) -> older_min

felm(NATURALIZED_END ~ base_decline_magnitude_sqrt*(GROUP_COAL_PCTGROUP_START_SQRT+
                                                      BARTIK_MEAN_SQRT+
                                                      YRIMMIG_START+
                                                      INCWAGE_MEDIAN_START_LN+
                                                      MARRIED_START+
                                                      GROUP_PCTCOUNTY_START_SQRT+
                                                      COAL_RELIANCE_START_SQRT+
                                                      RURAL_PCTCOUNTY_START+
                                                      BLACK_PCTCOUNTY_START_SQRT) |
       STATEICP_RANGE + SCICP_START | 0 | GROUP_SCICP,
     data = mlp_noncoal_v2[!is.na(GROUP_COAL_PCTGROUP_START_SQRT) &
                             AGE_START >= 41],
     weights = mlp_noncoal_v2[!is.na(GROUP_COAL_PCTGROUP_START_SQRT) &
                                AGE_START >= 41]$weight2_coal) -> older_full

felm_int_plot(modelMin = older_min,
              modelFull = older_full,
              ymin = -1.1, ymax = 0.8,
              title = "Older than 40") -> older_plot

# >> Dropping immigrants working in coal at decade end ####

felm(NATURALIZED_END ~ base_decline_magnitude_sqrt*(GROUP_COAL_PCTGROUP_START_SQRT+
                                                      BARTIK_MEAN_SQRT) |
       STATEICP_RANGE + SCICP_START | 0 | GROUP_SCICP,
     data = mlp_noncoal_v2[!is.na(GROUP_COAL_PCTGROUP_START_SQRT) &
                             IND1950_END != 216],
     weights = mlp_noncoal_v2[!is.na(GROUP_COAL_PCTGROUP_START_SQRT) &
                                IND1950_END != 216]$weight2_coal) -> stillNotCoal_min

felm(NATURALIZED_END ~ base_decline_magnitude_sqrt*(GROUP_COAL_PCTGROUP_START_SQRT+
                                                      BARTIK_MEAN_SQRT+
                                                      YRIMMIG_START+
                                                      INCWAGE_MEDIAN_START_LN+
                                                      MARRIED_START+
                                                      GROUP_PCTCOUNTY_START_SQRT+
                                                      COAL_RELIANCE_START_SQRT+
                                                      RURAL_PCTCOUNTY_START+
                                                      BLACK_PCTCOUNTY_START_SQRT) |
       STATEICP_RANGE + SCICP_START | 0 | GROUP_SCICP,
     data = mlp_noncoal_v2[!is.na(GROUP_COAL_PCTGROUP_START_SQRT) &
                             IND1950_END != 216],
     weights = mlp_noncoal_v2[!is.na(GROUP_COAL_PCTGROUP_START_SQRT) &
                                IND1950_END != 216]$weight2_coal) -> stillNotCoal_full

felm_int_plot(modelMin = stillNotCoal_min,
              modelFull = stillNotCoal_full,
              ymin = -1.1, ymax = 0.8,
              title = "Always coal−adjacent immigrants") -> stillNotCoal_plot

ggarrange(older_plot,
          stillNotCoal_plot,
          ncol = 2) -> old_and_stillNotCoal_plots

# > Table G1: leading economic conditions (placebo test) ####

felm(NATURALIZED_END ~ base_decline_magnitude_sqrt*(GROUP_COAL_PCTGROUP_START_SQRT+
                                                      BARTIK_MEAN_SQRT) +
       base_decline_magnitude_LEAD_sqrt*(GROUP_COAL_PCTGROUP_START_SQRT+
                                           BARTIK_MEAN_LEAD_SQRT) | STATEICP_RANGE + SCICP_START | 0 | GROUP_SCICP,
     data = mlp_full,
     subset = CITIZEN_START == 3,
     weights = mlp_full[CITIZEN_START == 3]$weight2_coal) %>%
  tidy()

# > Figure G3: controlling for growth ####

# >> All ####

felm(NATURALIZED_END ~ base_decline_magnitude_sqrt*(GROUP_COAL_PCTGROUP_START_SQRT + 
                                                      BARTIK_MEAN_SQRT + 
                                                      GROUP_PCTCOUNTY_START_SQRT + 
                                                      COAL_RELIANCE_START_SQRT +
                                                      RURAL_PCTCOUNTY_START + 
                                                      BLACK_PCTCOUNTY_START_SQRT + 
                                                      YRIMMIG_START +
                                                      INCWAGE_MEDIAN_START_LN + 
                                                      MARRIED_START +
                                                      base_growth_magnitude_sqrt +
                                                      sum_prod_chng +
                                                      BARTIK_MEAN_GROWTH_SQRT +
                                                      BARTIK_MEAN_TOT) | STATEICP_RANGE + SCICP_START | 0 | GROUP_SCICP,
     data = mlp_full,
     subset = CITIZEN_START == 3,
     weights = mlp_full[CITIZEN_START == 3]$weight2_coal) -> all_withGrowth

felm_int_plot(modelMin = total_reg01a,
              modelFull = all_withGrowth,
              title = "All",
              ymin = -.9, ymax = .42)

# >> Immigrant miners ####

felm(NATURALIZED_END ~ base_decline_magnitude_sqrt*(GROUP_COAL_PCTGROUP_START_SQRT + 
                                                      BARTIK_MEAN_SQRT + 
                                                      GROUP_PCTCOUNTY_START_SQRT + 
                                                      COAL_RELIANCE_START_SQRT +
                                                      RURAL_PCTCOUNTY_START + 
                                                      BLACK_PCTCOUNTY_START_SQRT + 
                                                      YRIMMIG_START +
                                                      INCWAGE_MEDIAN_START_LN + 
                                                      MARRIED_START +
                                                      base_growth_magnitude_sqrt +
                                                      sum_prod_chng +
                                                      BARTIK_MEAN_GROWTH_SQRT +
                                                      BARTIK_MEAN_TOT) | STATEICP_RANGE + SCICP_START | 0 | GROUP_SCICP,
     data = mlp_coal_v2,
     subset = CITIZEN_START == 3,
     weights = mlp_coal_v2[CITIZEN_START == 3]$weight2_coal) -> coal_withGrowth

felm_int_plot(modelMin = baseline_coal_naturalized_min_reg,
              modelFull = coal_withGrowth,
              title = "Immigrant miners",
              ymin = -1.2, ymax = 0.7)

# >> Coal-adjacent immigrants ####

felm(NATURALIZED_END ~ base_decline_magnitude_sqrt*(GROUP_COAL_PCTGROUP_START_SQRT + 
                                                      BARTIK_MEAN_SQRT + 
                                                      GROUP_PCTCOUNTY_START_SQRT + 
                                                      COAL_RELIANCE_START_SQRT +
                                                      RURAL_PCTCOUNTY_START + 
                                                      BLACK_PCTCOUNTY_START_SQRT + 
                                                      YRIMMIG_START +
                                                      INCWAGE_MEDIAN_START_LN + 
                                                      MARRIED_START +
                                                      base_growth_magnitude_sqrt +
                                                      sum_prod_chng +
                                                      BARTIK_MEAN_GROWTH_SQRT +
                                                      BARTIK_MEAN_TOT) | STATEICP_RANGE + SCICP_START | 0 | GROUP_SCICP,
     data = mlp_noncoal_v2,
     subset = CITIZEN_START == 3,
     weights = mlp_noncoal_v2[CITIZEN_START == 3]$weight2_coal) -> noncoal_withGrowth

felm_int_plot(modelMin = baseline_noncoal_naturalized_min_reg,
              modelFull = noncoal_withGrowth,
              title = "Coal-adjacent immigrants",
              ymin = -1.2, ymax = 0.7)

# > Figure G4: rail-connected group concentration and shock intensity ####

felm(NATURALIZED_END ~ base_decline_magnitude_yes_connection_sqrt*(connected_GROUP_COAL_PCTGROUP_START_SQRT + 
                                                                     BARTIK_MEAN_yes_connection_sqrt) | STATEICP_RANGE + SCICP_START | 0 | GROUP_SCICP,
     data = mlp_full,
     subset = CITIZEN_START == 3,
     weights = mlp_full[CITIZEN_START == 3]$weight2_coal) -> rail_min

felm(NATURALIZED_END ~ base_decline_magnitude_yes_connection_sqrt*(connected_GROUP_COAL_PCTGROUP_START_SQRT + 
                                                                     BARTIK_MEAN_yes_connection_sqrt + 
                                                                     GROUP_PCTCOUNTY_START_SQRT + 
                                                                     COAL_RELIANCE_START_SQRT +
                                                                     RURAL_PCTCOUNTY_START + 
                                                                     BLACK_PCTCOUNTY_START_SQRT + 
                                                                     YRIMMIG_START +
                                                                     INCWAGE_MEDIAN_START_LN + 
                                                                     MARRIED_START) | STATEICP_RANGE + SCICP_START | 0 | GROUP_SCICP,
     data = mlp_full,
     subset = CITIZEN_START == 3,
     weights = mlp_full[CITIZEN_START == 3]$weight2_coal) -> rail_full

felm_int_plot_for_rail_test(modelMin = rail_min,
                            modelFull = rail_full,
                            title = "Rail-connected counties",
                            ymin = -1.5, ymax = 0.5)

# > Figure G5: declarations of intention to naturalize ####

# >> All ####

felm(NATURALIZING_END ~ base_decline_magnitude_sqrt*(GROUP_COAL_PCTGROUP_START_SQRT+
                                                       BARTIK_MEAN_SQRT) | STATEICP_RANGE + SCICP_START | 0 | GROUP_SCICP,
     data = mlp_full,
     subset = CITIZEN_START == 3,
     weights = mlp_full[CITIZEN_START == 3]$weight2_coal) -> all_declarations_min

felm(NATURALIZING_END ~ base_decline_magnitude_sqrt*(GROUP_COAL_PCTGROUP_START_SQRT + 
                                                       BARTIK_MEAN_SQRT + 
                                                       GROUP_PCTCOUNTY_START_SQRT + 
                                                       COAL_RELIANCE_START_SQRT +
                                                       RURAL_PCTCOUNTY_START + 
                                                       BLACK_PCTCOUNTY_START_SQRT + 
                                                       YRIMMIG_START +
                                                       INCWAGE_MEDIAN_START_LN + 
                                                       MARRIED_START) | STATEICP_RANGE + SCICP_START | 0 | GROUP_SCICP,
     data = mlp_full,
     subset = CITIZEN_START == 3,
     weights = mlp_full[CITIZEN_START == 3]$weight2_coal) -> all_declarations_full

felm_int_plot(modelMin = all_declarations_min,
              modelFull = all_declarations_full,
              ymin = -.78, ymax = .5)

# >> Immigrant miners ####

felm(NATURALIZING_END ~ base_decline_magnitude_sqrt*(GROUP_COAL_PCTGROUP_START_SQRT+
                                                       BARTIK_MEAN_SQRT) | STATEICP_RANGE + SCICP_START | 0 | GROUP_SCICP,
     data = mlp_coal_v2,
     subset = CITIZEN_START == 3,
     weights = mlp_coal_v2[CITIZEN_START == 3]$weight2_coal) -> coal_declarations_min

felm(NATURALIZING_END ~ base_decline_magnitude_sqrt*(GROUP_COAL_PCTGROUP_START_SQRT + 
                                                       BARTIK_MEAN_SQRT + 
                                                       GROUP_PCTCOUNTY_START_SQRT + 
                                                       COAL_RELIANCE_START_SQRT +
                                                       RURAL_PCTCOUNTY_START + 
                                                       BLACK_PCTCOUNTY_START_SQRT + 
                                                       YRIMMIG_START +
                                                       INCWAGE_MEDIAN_START_LN + 
                                                       MARRIED_START) | STATEICP_RANGE + SCICP_START | 0 | GROUP_SCICP,
     data = mlp_coal_v2,
     subset = CITIZEN_START == 3,
     weights = mlp_coal_v2[CITIZEN_START == 3]$weight2_coal) -> coal_declarations_full

felm_int_plot(modelMin = coal_declarations_min,
              modelFull = coal_declarations_full,
              ymin = -1, ymax = .5)

# >> Coal-adjacent immigrants ####

felm(NATURALIZING_END ~ base_decline_magnitude_sqrt*(GROUP_COAL_PCTGROUP_START_SQRT+
                                                       BARTIK_MEAN_SQRT) | STATEICP_RANGE + SCICP_START | 0 | GROUP_SCICP,
     data = mlp_noncoal_v2,
     subset = CITIZEN_START == 3,
     weights = mlp_noncoal_v2[CITIZEN_START == 3]$weight2_coal) -> noncoal_declarations_min

felm(NATURALIZING_END ~ base_decline_magnitude_sqrt*(GROUP_COAL_PCTGROUP_START_SQRT + 
                                                       BARTIK_MEAN_SQRT + 
                                                       GROUP_PCTCOUNTY_START_SQRT + 
                                                       COAL_RELIANCE_START_SQRT +
                                                       RURAL_PCTCOUNTY_START + 
                                                       BLACK_PCTCOUNTY_START_SQRT + 
                                                       YRIMMIG_START +
                                                       INCWAGE_MEDIAN_START_LN + 
                                                       MARRIED_START) | STATEICP_RANGE + SCICP_START | 0 | GROUP_SCICP,
     data = mlp_noncoal_v2,
     subset = CITIZEN_START == 3,
     weights = mlp_noncoal_v2[CITIZEN_START == 3]$weight2_coal) -> noncoal_declarations_full

felm_int_plot(modelMin = noncoal_declarations_min,
              modelFull = noncoal_declarations_full,
              ymin = -1.1, ymax = .7)

# > Figure G6: clustering standard errors by county ####

# >> All ####

felm(NATURALIZED_END ~ base_decline_magnitude_sqrt*(GROUP_COAL_PCTGROUP_START_SQRT+
                                                      BARTIK_MEAN_SQRT) | STATEICP_RANGE + SCICP_START | 0 | SCICP_START,
     data = mlp_full,
     subset = CITIZEN_START == 3,
     weights = mlp_full[CITIZEN_START == 3]$weight2_coal) -> all_countyErrorCluster_min

felm(NATURALIZED_END ~ base_decline_magnitude_sqrt*(GROUP_COAL_PCTGROUP_START_SQRT + 
                                                      BARTIK_MEAN_SQRT + 
                                                      GROUP_PCTCOUNTY_START_SQRT + 
                                                      COAL_RELIANCE_START_SQRT +
                                                      RURAL_PCTCOUNTY_START + 
                                                      BLACK_PCTCOUNTY_START_SQRT + 
                                                      YRIMMIG_START +
                                                      INCWAGE_MEDIAN_START_LN + 
                                                      MARRIED_START) | STATEICP_RANGE + SCICP_START | 0 | SCICP_START,
     data = mlp_full,
     subset = CITIZEN_START == 3,
     weights = mlp_full[CITIZEN_START == 3]$weight2_coal) -> all_countyErrorCluster_full

felm_int_plot(modelMin = all_countyErrorCluster_min,
              modelFull = all_countyErrorCluster_full,
              ymin = -1, ymax = .5)

# >> Immigrant miners ####

felm(NATURALIZED_END ~ base_decline_magnitude_sqrt*(GROUP_COAL_PCTGROUP_START_SQRT+
                                                      BARTIK_MEAN_SQRT) | STATEICP_RANGE + SCICP_START | 0 | SCICP_START,
     data = mlp_coal_v2,
     subset = CITIZEN_START == 3,
     weights = mlp_coal_v2[CITIZEN_START == 3]$weight2_coal) -> coal_countyErrorCluster_min

felm(NATURALIZED_END ~ base_decline_magnitude_sqrt*(GROUP_COAL_PCTGROUP_START_SQRT + 
                                                      BARTIK_MEAN_SQRT + 
                                                      GROUP_PCTCOUNTY_START_SQRT + 
                                                      COAL_RELIANCE_START_SQRT +
                                                      RURAL_PCTCOUNTY_START + 
                                                      BLACK_PCTCOUNTY_START_SQRT + 
                                                      YRIMMIG_START +
                                                      INCWAGE_MEDIAN_START_LN + 
                                                      MARRIED_START) | STATEICP_RANGE + SCICP_START | 0 | SCICP_START,
     data = mlp_coal_v2,
     subset = CITIZEN_START == 3,
     weights = mlp_coal_v2[CITIZEN_START == 3]$weight2_coal) -> coal_countyErrorCluster_full

felm_int_plot(modelMin = coal_countyErrorCluster_min,
              modelFull = coal_countyErrorCluster_full,
              ymin = -1.2, ymax = .6)

# >> Coal-adjacent immigrants ####

felm(NATURALIZED_END ~ base_decline_magnitude_sqrt*(GROUP_COAL_PCTGROUP_START_SQRT+
                                                      BARTIK_MEAN_SQRT) | STATEICP_RANGE + SCICP_START | 0 | SCICP_START,
     data = mlp_noncoal_v2,
     subset = CITIZEN_START == 3,
     weights = mlp_noncoal_v2[CITIZEN_START == 3]$weight2_coal) -> noncoal_countyErrorCluster_min

felm(NATURALIZED_END ~ base_decline_magnitude_sqrt*(GROUP_COAL_PCTGROUP_START_SQRT + 
                                                      BARTIK_MEAN_SQRT + 
                                                      GROUP_PCTCOUNTY_START_SQRT + 
                                                      COAL_RELIANCE_START_SQRT +
                                                      RURAL_PCTCOUNTY_START + 
                                                      BLACK_PCTCOUNTY_START_SQRT + 
                                                      YRIMMIG_START +
                                                      INCWAGE_MEDIAN_START_LN + 
                                                      MARRIED_START) | STATEICP_RANGE + SCICP_START | 0 | SCICP_START,
     data = mlp_noncoal_v2,
     subset = CITIZEN_START == 3,
     weights = mlp_noncoal_v2[CITIZEN_START == 3]$weight2_coal) -> noncoal_countyErrorCluster_full

felm_int_plot(modelMin = noncoal_countyErrorCluster_min,
              modelFull = noncoal_countyErrorCluster_full,
              ymin = -1.4, ymax = .8)

# > Figure G7: binned estimator results for main tests ####

# >> All ####

interflex(data = mlp_full[CITIZEN_START == 3],
          Y = "NATURALIZED_END",
          X = "base_decline_magnitude_sqrt",
          D = "GROUP_COAL_PCTGROUP_START_SQRT",
          Z = c("BARTIK_MEAN_SQRT",
                "YRIMMIG_START",
                "INCWAGE_MEDIAN_START_LN",
                "MARRIED_START",
                "GROUP_PCTCOUNTY_START_SQRT",
                "COAL_RELIANCE_START_SQRT",
                "RURAL_PCTCOUNTY_START",
                "BLACK_PCTCOUNTY_START_SQRT"
          ),
          FE = c("STATEICP_RANGE", "SCICP_START"),
          cl = "GROUP_SCICP",
          weights = "weight2_coal",
          estimator = "binning",
          na.rm = TRUE,
          theme.bw = TRUE,
          nbins = 3,
          ylab = "Marginal effect of group concentration",
          xlab = "Shock intensity") -> all_interflex

all_interflex$figure

# >> Immigrant miners ####

interflex(data = mlp_coal_wRHS_v2[CITIZEN_START == 3],
          Y = "NATURALIZED_END",
          X = "base_decline_magnitude_sqrt",
          D = "GROUP_COAL_PCTGROUP_START_SQRT",
          Z = c("BARTIK_MEAN_SQRT",
                "YRIMMIG_START",
                "INCWAGE_MEDIAN_START_LN",
                "MARRIED_START",
                "GROUP_PCTCOUNTY_START_SQRT",
                "COAL_RELIANCE_START_SQRT",
                "RURAL_PCTCOUNTY_START",
                "BLACK_PCTCOUNTY_START_SQRT"
          ),
          FE = c("STATEICP_RANGE", "SCICP_START"),
          cl = "GROUP_SCICP",
          weights = "weight2_coal",
          estimator = "binning",
          na.rm = TRUE,
          theme.bw = TRUE,
          nbins = 5,
          ylab = "Marginal effect of group concentration",
          xlab = "Shock intensity") -> coal_interflex

coal_interflex$figure

# >> Coal-adjacent immigrants ####

interflex(data = mlp_noncoal_wRHS_v2[CITIZEN_START == 3],
          Y = "NATURALIZED_END",
          X = "base_decline_magnitude_sqrt",
          D = "GROUP_COAL_PCTGROUP_START_SQRT",
          Z = c("BARTIK_MEAN_SQRT",
                "YRIMMIG_START",
                "INCWAGE_MEDIAN_START_LN",
                "MARRIED_START",
                "GROUP_PCTCOUNTY_START_SQRT",
                "COAL_RELIANCE_START_SQRT",
                "RURAL_PCTCOUNTY_START",
                "BLACK_PCTCOUNTY_START_SQRT"
          ),
          FE = c("STATEICP_RANGE", "SCICP_START"),
          cl = "GROUP_SCICP",
          weights = "weight2_coal",
          estimator = "binning",
          na.rm = TRUE,
          theme.bw = TRUE,
          nbins = 5,
          ylab = "Marginal effect of group concentration",
          xlab = "Shock intensity") -> noncoal_interflex

noncoal_interflex$figure

# > Figure G8: iteratively dropping single counties from sample ####

scicp_list <- unique(mlp_full$SCICP_START)

scicp_drop_tib <- tibble(SCICP = scicp_list,
                         group_coef = NA,
                         group_se = NA,
                         group_p = NA,
                         group.decline_coef = NA,
                         group.decline_se = NA,
                         group.decline_p = NA)

for (i in 1:length(scicp_list)){
  felm(NATURALIZED_END ~ base_decline_magnitude_sqrt*(GROUP_COAL_PCTGROUP_START_SQRT + 
                                                        BARTIK_MEAN_SQRT + 
                                                        GROUP_PCTCOUNTY_START_SQRT + 
                                                        COAL_RELIANCE_START_SQRT +
                                                        RURAL_PCTCOUNTY_START + 
                                                        BLACK_PCTCOUNTY_START_SQRT + 
                                                        YRIMMIG_START +
                                                        INCWAGE_MEDIAN_START_LN + 
                                                        MARRIED_START) | STATEICP_RANGE + SCICP_START | 0 | GROUP_SCICP,
       data = mlp_full,
       subset = CITIZEN_START == 3 & SCICP_START != scicp_list[i],
       weights = mlp_full[CITIZEN_START == 3 & SCICP_START != scicp_list[i]]$weight2_coal) -> run1
  tidy(run1) -> run1tidy
  
  scicp_drop_tib$group_coef[i] <- run1tidy$estimate[2]
  scicp_drop_tib$group_se[i] <- run1tidy$std.error[2]
  scicp_drop_tib$group_p[i] <- run1tidy$p.value[2]
  
  scicp_drop_tib$group.decline_coef[i] <- run1tidy$estimate[11]
  scicp_drop_tib$group.decline_se[i] <- run1tidy$std.error[11]
  scicp_drop_tib$group.decline_p[i] <- run1tidy$p.value[11]
  
  print(i)
}

scicp_drop_tib %>%
  mutate(id = 1:nrow(.)) %>%
  ggplot(aes(x = id)) +
  geom_point(aes(y = group.decline_coef), color = "blue") +
  geom_linerange(aes(ymin = group.decline_coef-1.96*group.decline_se,
                     ymax = group.decline_coef+1.96*group.decline_se), color = "blue") +
  geom_point(aes(y = group_coef), color = "red") +
  geom_linerange(aes(ymin = group_coef-1.96*group_se,
                     ymax = group_coef+1.96*group_se), color = "red") +
  geom_hline(aes(yintercept = 0), lty = 2) +
  theme_classic() +
  theme(legend.position = "none") +
  ylab("Coef.") +
  xlab("County ID")

# > Figure G9: iteratively dropping single ethnic groups from sample ####

group_list <- unique(mlp_full$GROUP_START)

group_drop_tib <- tibble(GROUP = group_list,
                         group_coef = NA,
                         group_se = NA,
                         group_p = NA,
                         group.decline_coef = NA,
                         group.decline_se = NA,
                         group.decline_p = NA)

for (i in 1:length(group_list)){
  felm(NATURALIZED_END ~ base_decline_magnitude_sqrt*(GROUP_COAL_PCTGROUP_START_SQRT + 
                                                        BARTIK_MEAN_SQRT + 
                                                        GROUP_PCTCOUNTY_START_SQRT + 
                                                        COAL_RELIANCE_START_SQRT +
                                                        RURAL_PCTCOUNTY_START + 
                                                        BLACK_PCTCOUNTY_START_SQRT + 
                                                        YRIMMIG_START +
                                                        INCWAGE_MEDIAN_START_LN + 
                                                        MARRIED_START) | STATEICP_RANGE + SCICP_START | 0 | GROUP_SCICP,
       data = mlp_full,
       subset = CITIZEN_START == 3 & GROUP_START != group_list[i],
       weights = mlp_full[CITIZEN_START == 3 & GROUP_START != group_list[i]]$weight2_coal) -> run2
  tidy(run2) -> run2tidy
  
  group_drop_tib$group_coef[i] <- run2tidy$estimate[2]
  group_drop_tib$group_se[i] <- run2tidy$std.error[2]
  group_drop_tib$group_p[i] <- run2tidy$p.value[2]
  
  group_drop_tib$group.decline_coef[i] <- run2tidy$estimate[11]
  group_drop_tib$group.decline_se[i] <- run2tidy$std.error[11]
  group_drop_tib$group.decline_p[i] <- run2tidy$p.value[11]
  
  print(i)
}

group_drop_tib %>%
  mutate(id = 1:nrow(.)) %>%
  ggplot(aes(x = id)) +
  geom_point(aes(y = group.decline_coef), color = "blue") +
  geom_linerange(aes(ymin = group.decline_coef-1.96*group.decline_se,
                     ymax = group.decline_coef+1.96*group.decline_se), color = "blue") +
  geom_point(aes(y = group_coef), color = "red") +
  geom_linerange(aes(ymin = group_coef-1.96*group_se,
                     ymax = group_coef+1.96*group_se), color = "red") +
  geom_hline(aes(yintercept = 0), lty = 2) +
  theme_classic() +
  theme(legend.position = "none") +
  ylab("Coef.") +
  xlab("Group ID")

# > Figure G10: iteratively dropping random 1% subsamples from sample ####

obser_drop_tib <- tibble(OBSER = 1:500,
                         group_coef = NA,
                         group_se = NA,
                         group_p = NA,
                         group.decline_coef = NA,
                         group.decline_se = NA,
                         group.decline_p = NA)


for (i in 1:500){
  
  mlp_temp <- sample_frac(mlp_full[CITIZEN_START == 3], size = 99/100)
  
  felm(NATURALIZED_END ~ base_decline_magnitude_sqrt*(GROUP_COAL_PCTGROUP_START_SQRT + 
                                                        BARTIK_MEAN_SQRT + 
                                                        GROUP_PCTCOUNTY_START_SQRT + 
                                                        COAL_RELIANCE_START_SQRT +
                                                        RURAL_PCTCOUNTY_START + 
                                                        BLACK_PCTCOUNTY_START_SQRT + 
                                                        YRIMMIG_START +
                                                        INCWAGE_MEDIAN_START_LN + 
                                                        MARRIED_START) | STATEICP_RANGE + SCICP_START | 0 | GROUP_SCICP,
       data = mlp_temp,
       weights = mlp_temp$weight2_coal) -> run3
  tidy(run3) -> run3tidy
  
  obser_drop_tib$group_coef[i] <- run3tidy$estimate[2]
  obser_drop_tib$group_se[i] <- run3tidy$std.error[2]
  obser_drop_tib$group_p[i] <- run3tidy$p.value[2]
  
  obser_drop_tib$group.decline_coef[i] <- run3tidy$estimate[11]
  obser_drop_tib$group.decline_se[i] <- run3tidy$std.error[11]
  obser_drop_tib$group.decline_p[i] <- run3tidy$p.value[11]
  
  print(i)
}

obser_drop_tib %>%
  mutate(id = 1:nrow(.)) %>%
  ggplot(aes(x = id)) +
  geom_point(aes(y = group.decline_coef), color = "blue") +
  geom_linerange(aes(ymin = group.decline_coef-1.96*group.decline_se,
                     ymax = group.decline_coef+1.96*group.decline_se), color = "blue") +
  geom_point(aes(y = group_coef), color = "red") +
  geom_linerange(aes(ymin = group_coef-1.96*group_se,
                     ymax = group_coef+1.96*group_se), color = "red") +
  geom_hline(aes(yintercept = 0), lty = 2) +
  theme_classic() +
  theme(legend.position = "none") +
  ylab("Coef.") +
  xlab("Iteration Number")

# > Figure G11: county-census fixed effects ####

# >> All ####

felm(NATURALIZED_END ~ base_decline_magnitude_sqrt*(GROUP_COAL_PCTGROUP_START_SQRT+
                                                      BARTIK_MEAN_SQRT) | RANGE_SCICP | 0 | SCICP_START,
     data = mlp_full,
     subset = CITIZEN_START == 3,
     weights = mlp_full[CITIZEN_START == 3]$weight2_coal) -> all_countyCensusFE_min

felm(NATURALIZED_END ~ base_decline_magnitude_sqrt*(GROUP_COAL_PCTGROUP_START_SQRT + 
                                                      BARTIK_MEAN_SQRT + 
                                                      GROUP_PCTCOUNTY_START_SQRT + 
                                                      COAL_RELIANCE_START_SQRT +
                                                      RURAL_PCTCOUNTY_START + 
                                                      BLACK_PCTCOUNTY_START_SQRT + 
                                                      YRIMMIG_START +
                                                      INCWAGE_MEDIAN_START_LN + 
                                                      MARRIED_START) | RANGE_SCICP | 0 | SCICP_START,
     data = mlp_full,
     subset = CITIZEN_START == 3,
     weights = mlp_full[CITIZEN_START == 3]$weight2_coal) -> all_countyCensusFE_full

felm_int_plot(modelMin = all_countyCensusFE_min,
              modelFull = all_countyCensusFE_full,
              title = "All",
              ymin = -1.25, ymax = .5) -> all_countyCensusFE_plot

# >> Immigrant miners ####

felm(NATURALIZED_END ~ base_decline_magnitude_sqrt*(GROUP_COAL_PCTGROUP_START_SQRT+
                                                      BARTIK_MEAN_SQRT) | RANGE_SCICP | 0 | SCICP_START,
     data = mlp_coal_v2,
     subset = CITIZEN_START == 3,
     weights = mlp_coal_v2[CITIZEN_START == 3]$weight2_coal) -> coal_countyCensusFE_min

felm(NATURALIZED_END ~ base_decline_magnitude_sqrt*(GROUP_COAL_PCTGROUP_START_SQRT + 
                                                      BARTIK_MEAN_SQRT + 
                                                      GROUP_PCTCOUNTY_START_SQRT + 
                                                      COAL_RELIANCE_START_SQRT +
                                                      RURAL_PCTCOUNTY_START + 
                                                      BLACK_PCTCOUNTY_START_SQRT + 
                                                      YRIMMIG_START +
                                                      INCWAGE_MEDIAN_START_LN + 
                                                      MARRIED_START) | RANGE_SCICP | 0 | SCICP_START,
     data = mlp_coal_v2,
     subset = CITIZEN_START == 3,
     weights = mlp_coal_v2[CITIZEN_START == 3]$weight2_coal) -> coal_countyCensusFE_full

felm_int_plot(modelMin = coal_countyCensusFE_min,
              modelFull = coal_countyCensusFE_full,
              title = "Immigrant miners",
              ymin = -1.5, ymax = .75) -> coal_countyCensusFE_plot

# >> Coal-adjacent immigrants ####

felm(NATURALIZED_END ~ base_decline_magnitude_sqrt*(GROUP_COAL_PCTGROUP_START_SQRT+
                                                      BARTIK_MEAN_SQRT) | RANGE_SCICP | 0 | SCICP_START,
     data = mlp_noncoal_v2,
     subset = CITIZEN_START == 3,
     weights = mlp_noncoal_v2[CITIZEN_START == 3]$weight2_coal) -> noncoal_countyCensusFE_min

felm(NATURALIZED_END ~ base_decline_magnitude_sqrt*(GROUP_COAL_PCTGROUP_START_SQRT + 
                                                      BARTIK_MEAN_SQRT + 
                                                      GROUP_PCTCOUNTY_START_SQRT + 
                                                      COAL_RELIANCE_START_SQRT +
                                                      RURAL_PCTCOUNTY_START + 
                                                      BLACK_PCTCOUNTY_START_SQRT + 
                                                      YRIMMIG_START +
                                                      INCWAGE_MEDIAN_START_LN + 
                                                      MARRIED_START) | RANGE_SCICP | 0 | SCICP_START,
     data = mlp_noncoal_v2,
     subset = CITIZEN_START == 3,
     weights = mlp_noncoal_v2[CITIZEN_START == 3]$weight2_coal) -> noncoal_countyCensusFE_full

felm_int_plot(modelMin = noncoal_countyCensusFE_min,
              modelFull = noncoal_countyCensusFE_full,
              title = "Coal-adjacent immigrants",
              ymin = -1.75, ymax = 1) -> noncoal_countyCensusFE_plot

ggarrange(all_countyCensusFE_plot,
          coal_countyCensusFE_plot,
          noncoal_countyCensusFE_plot,
          nrow = 1)

# > Figure G12: excluding major strike years ####

# >> All ####

felm(NATURALIZED_END ~ base_decline_magnitude_noStrikes_sqrt*(GROUP_COAL_PCTGROUP_START_SQRT+
                                                                BARTIK_MEAN_SQRT) | STATEICP_RANGE + SCICP_START | 0 | GROUP_SCICP,
     data = mlp_full,
     subset = CITIZEN_START == 3,
     weights = mlp_full[CITIZEN_START == 3]$weight2_coal) -> noStrikes_reg01a

felm(NATURALIZED_END ~ base_decline_magnitude_noStrikes_sqrt*(GROUP_COAL_PCTGROUP_START_SQRT + 
                                                                BARTIK_MEAN_SQRT + 
                                                                GROUP_PCTCOUNTY_START_SQRT + 
                                                                COAL_RELIANCE_START_SQRT +
                                                                RURAL_PCTCOUNTY_START + 
                                                                BLACK_PCTCOUNTY_START_SQRT + 
                                                                YRIMMIG_START +
                                                                INCWAGE_MEDIAN_START_LN + 
                                                                MARRIED_START) | STATEICP_RANGE + SCICP_START | 0 | GROUP_SCICP,
     data = mlp_full,
     subset = CITIZEN_START == 3,
     weights = mlp_full[CITIZEN_START == 3]$weight2_coal) -> noStrikes_reg01b

felm_int_plot_for_noStrikes(modelMin = noStrikes_reg01a,
                            modelFull = noStrikes_reg01b,
                            title = "All",
                            ymin = -.8, ymax = .5)

# >> Immigrant miners ####

felm(NATURALIZED_END ~ base_decline_magnitude_noStrikes_sqrt*(GROUP_COAL_PCTGROUP_START_SQRT+
                                                                BARTIK_MEAN_SQRT) | STATEICP_RANGE + SCICP_START | 0 | GROUP_SCICP,
     data = mlp_coal_v2,
     weights = mlp_coal_v2$weight2_coal) -> noStrikes_coal_naturalized_min_reg

felm(NATURALIZED_END ~ base_decline_magnitude_noStrikes_sqrt*(GROUP_COAL_PCTGROUP_START_SQRT +
                                                                BARTIK_MEAN_SQRT + GROUP_PCTCOUNTY_START_SQRT + COAL_RELIANCE_START_SQRT +
                                                                RURAL_PCTCOUNTY_START + BLACK_PCTCOUNTY_START_SQRT + YRIMMIG_START +
                                                                INCWAGE_MEDIAN_START_LN + MARRIED_START) | STATEICP_RANGE + SCICP_START | 0 | GROUP_SCICP,
     data = mlp_coal_v2,
     weights = mlp_coal_v2$weight2_coal) -> noStrikes_coal_naturalized_full_reg

felm_int_plot_for_noStrikes(modelMin = noStrikes_coal_naturalized_min_reg,
                            modelFull = noStrikes_coal_naturalized_full_reg,
                            title = "Immigrant miners",
                            ymin = -1, ymax = .5)

# >> Coal-adjacent immigrants ####

felm(NATURALIZED_END ~ base_decline_magnitude_noStrikes_sqrt*(GROUP_COAL_PCTGROUP_START_SQRT+
                                                                BARTIK_MEAN_SQRT) | STATEICP_RANGE + SCICP_START | 0 | GROUP_SCICP,
     data = mlp_noncoal_v2,
     weights = mlp_noncoal_v2$weight2_coal) -> noStrikes_noncoal_naturalized_min_reg

felm(NATURALIZED_END ~ base_decline_magnitude_noStrikes_sqrt*(GROUP_COAL_PCTGROUP_START_SQRT +
                                                                BARTIK_MEAN_SQRT + GROUP_PCTCOUNTY_START_SQRT + COAL_RELIANCE_START_SQRT +
                                                                RURAL_PCTCOUNTY_START + BLACK_PCTCOUNTY_START_SQRT + YRIMMIG_START +
                                                                INCWAGE_MEDIAN_START_LN + MARRIED_START) | STATEICP_RANGE + SCICP_START | 0 | GROUP_SCICP,
     data = mlp_noncoal_v2,
     weights = mlp_noncoal_v2$weight2_coal) -> noStrikes_noncoal_naturalized_full_reg

felm_int_plot_for_noStrikes(modelMin = noStrikes_noncoal_naturalized_min_reg,
                            modelFull = noStrikes_noncoal_naturalized_full_reg,
                            title = "Coal-adjacent immigrants",
                            ymin = -1, ymax = .7)

# > Figure G13: excluding states with non-citizen suffrage ####

# >> All ####

felm(NATURALIZED_END ~ base_decline_magnitude_sqrt*(GROUP_COAL_PCTGROUP_START_SQRT+
                                                      BARTIK_MEAN_SQRT) | RANGE_SCICP | 0 | GROUP_SCICP,
     data = mlp_full,
     subset = CITIZEN_START == 3 & noncitizen_suffrage == 0,
     weights = mlp_full[CITIZEN_START == 3 & noncitizen_suffrage == 0]$weight2_coal) -> noImmigSuff_reg01a

felm(NATURALIZED_END ~ base_decline_magnitude_sqrt*(GROUP_COAL_PCTGROUP_START_SQRT + 
                                                      BARTIK_MEAN_SQRT + 
                                                      GROUP_PCTCOUNTY_START_SQRT + 
                                                      COAL_RELIANCE_START_SQRT +
                                                      RURAL_PCTCOUNTY_START + 
                                                      BLACK_PCTCOUNTY_START_SQRT + 
                                                      YRIMMIG_START +
                                                      INCWAGE_MEDIAN_START_LN + 
                                                      MARRIED_START) | RANGE_SCICP | 0 | GROUP_SCICP,
     data = mlp_full,
     subset = CITIZEN_START == 3 & noncitizen_suffrage == 0,
     weights = mlp_full[CITIZEN_START == 3 & noncitizen_suffrage == 0]$weight2_coal) -> noImmigSuff_reg01b

felm_int_plot(modelMin = noImmigSuff_reg01a,
              modelFull = noImmigSuff_reg01b,
              title = "All",
              ymin = -1.2, ymax = .6)

# >> Immigrant miners ####

felm(NATURALIZED_END ~ base_decline_magnitude_sqrt*(GROUP_COAL_PCTGROUP_START_SQRT+
                                                      BARTIK_MEAN_SQRT) | RANGE_SCICP | 0 | GROUP_SCICP,
     data = mlp_coal_v2[noncitizen_suffrage == 0],
     weights = mlp_coal_v2[noncitizen_suffrage == 0]$weight2_coal) -> noImmigSuff_coal_naturalized_min_reg

felm(NATURALIZED_END ~ base_decline_magnitude_sqrt*(GROUP_COAL_PCTGROUP_START_SQRT +
                                                      BARTIK_MEAN_SQRT + GROUP_PCTCOUNTY_START_SQRT + COAL_RELIANCE_START_SQRT +
                                                      RURAL_PCTCOUNTY_START + BLACK_PCTCOUNTY_START_SQRT + YRIMMIG_START +
                                                      INCWAGE_MEDIAN_START_LN + MARRIED_START) | RANGE_SCICP | 0 | GROUP_SCICP,
     data = mlp_coal_v2[noncitizen_suffrage == 0],
     weights = mlp_coal_v2[noncitizen_suffrage == 0]$weight2_coal) -> noImmigSuff_coal_naturalized_full_reg

felm_int_plot(modelMin = noImmigSuff_coal_naturalized_min_reg,
              modelFull = noImmigSuff_coal_naturalized_full_reg,
              title = "Immigrant miners",
              ymin = -1.5, ymax = .8)

# >> Coal-adjacent immigrants ####

felm(NATURALIZED_END ~ base_decline_magnitude_sqrt*(GROUP_COAL_PCTGROUP_START_SQRT+
                                                      BARTIK_MEAN_SQRT) | RANGE_SCICP | 0 | GROUP_SCICP,
     data = mlp_noncoal_v2[noncitizen_suffrage == 0],
     weights = mlp_noncoal_v2[noncitizen_suffrage == 0]$weight2_coal) -> noImmigSuff_noncoal_naturalized_min_reg

felm(NATURALIZED_END ~ base_decline_magnitude_sqrt*(GROUP_COAL_PCTGROUP_START_SQRT +
                                                      BARTIK_MEAN_SQRT + GROUP_PCTCOUNTY_START_SQRT + COAL_RELIANCE_START_SQRT +
                                                      RURAL_PCTCOUNTY_START + BLACK_PCTCOUNTY_START_SQRT + YRIMMIG_START +
                                                      INCWAGE_MEDIAN_START_LN + MARRIED_START) | RANGE_SCICP | 0 | GROUP_SCICP,
     data = mlp_noncoal_v2[noncitizen_suffrage == 0],
     weights = mlp_noncoal_v2[noncitizen_suffrage == 0]$weight2_coal) -> noImmigSuff_noncoal_naturalized_full_reg

felm_int_plot(modelMin = noImmigSuff_noncoal_naturalized_min_reg,
              modelFull = noImmigSuff_noncoal_naturalized_full_reg,
              title = "Coal-adjacent immigrants",
              ymin = -1.5, ymax = .8)

# > Figure G14: including only states with non-citizen suffrage ####

# >> All ####

felm(NATURALIZED_END ~ base_decline_magnitude_sqrt*(GROUP_COAL_PCTGROUP_START_SQRT+
                                                      BARTIK_MEAN_SQRT) | RANGE_SCICP | 0 | GROUP_SCICP,
     data = mlp_full,
     subset = CITIZEN_START == 3 & noncitizen_suffrage == 1,
     weights = mlp_full[CITIZEN_START == 3 & noncitizen_suffrage == 1]$weight2_coal) -> yesImmigSuff_reg01a

felm(NATURALIZED_END ~ base_decline_magnitude_sqrt*(GROUP_COAL_PCTGROUP_START_SQRT + 
                                                      BARTIK_MEAN_SQRT + 
                                                      GROUP_PCTCOUNTY_START_SQRT + 
                                                      COAL_RELIANCE_START_SQRT +
                                                      RURAL_PCTCOUNTY_START + 
                                                      BLACK_PCTCOUNTY_START_SQRT + 
                                                      YRIMMIG_START +
                                                      INCWAGE_MEDIAN_START_LN + 
                                                      MARRIED_START) | RANGE_SCICP | 0 | GROUP_SCICP,
     data = mlp_full,
     subset = CITIZEN_START == 3 & noncitizen_suffrage == 1,
     weights = mlp_full[CITIZEN_START == 3 & noncitizen_suffrage == 1]$weight2_coal) -> yesImmigSuff_reg01b

felm_int_plot(modelMin = yesImmigSuff_reg01a,
              modelFull = yesImmigSuff_reg01b,
              title = "All",
              ymin = -1.2, ymax = .6)

# >> Immigrant miners ####

felm(NATURALIZED_END ~ base_decline_magnitude_sqrt*(GROUP_COAL_PCTGROUP_START_SQRT+
                                                      BARTIK_MEAN_SQRT) | RANGE_SCICP | 0 | GROUP_SCICP,
     data = mlp_coal_v2[noncitizen_suffrage == 1],
     weights = mlp_coal_v2[noncitizen_suffrage == 1]$weight2_coal) -> yesImmigSuff_coal_naturalized_min_reg

felm(NATURALIZED_END ~ base_decline_magnitude_sqrt*(GROUP_COAL_PCTGROUP_START_SQRT +
                                                      BARTIK_MEAN_SQRT + GROUP_PCTCOUNTY_START_SQRT + COAL_RELIANCE_START_SQRT +
                                                      RURAL_PCTCOUNTY_START + BLACK_PCTCOUNTY_START_SQRT + YRIMMIG_START +
                                                      INCWAGE_MEDIAN_START_LN + MARRIED_START) | RANGE_SCICP | 0 | GROUP_SCICP,
     data = mlp_coal_v2[noncitizen_suffrage == 1],
     weights = mlp_coal_v2[noncitizen_suffrage == 1]$weight2_coal) -> yesImmigSuff_coal_naturalized_full_reg

felm_int_plot(modelMin = yesImmigSuff_coal_naturalized_min_reg,
              modelFull = yesImmigSuff_coal_naturalized_full_reg,
              title = "Immigrant miners",
              ymin = -1.5, ymax = .8)

# >> Coal-adjacent immigrants ####

felm(NATURALIZED_END ~ base_decline_magnitude_sqrt*(GROUP_COAL_PCTGROUP_START_SQRT+
                                                      BARTIK_MEAN_SQRT) | RANGE_SCICP | 0 | GROUP_SCICP,
     data = mlp_noncoal_v2[noncitizen_suffrage == 1],
     weights = mlp_noncoal_v2[noncitizen_suffrage == 1]$weight2_coal) -> yesImmigSuff_noncoal_naturalized_min_reg

felm(NATURALIZED_END ~ base_decline_magnitude_sqrt*(GROUP_COAL_PCTGROUP_START_SQRT +
                                                      BARTIK_MEAN_SQRT + GROUP_PCTCOUNTY_START_SQRT + COAL_RELIANCE_START_SQRT +
                                                      RURAL_PCTCOUNTY_START + BLACK_PCTCOUNTY_START_SQRT + YRIMMIG_START +
                                                      INCWAGE_MEDIAN_START_LN + MARRIED_START) | RANGE_SCICP | 0 | GROUP_SCICP,
     data = mlp_noncoal_v2[noncitizen_suffrage == 1],
     weights = mlp_noncoal_v2[noncitizen_suffrage == 1]$weight2_coal) -> yesImmigSuff_noncoal_naturalized_full_reg

felm_int_plot(modelMin = yesImmigSuff_noncoal_naturalized_min_reg,
              modelFull = yesImmigSuff_noncoal_naturalized_full_reg,
              title = "Coal-adjacent immigrants",
              ymin = -1.5, ymax = 1.1)

# > Table G2: naturalization ~ group concentration ####

felm(NATURALIZED_END ~ GROUP_COAL_PCTGROUP_START_SQRT | STATEICP_RANGE + SCICP_START | 0 | GROUP_SCICP,
     data = mlp_full,
     subset = CITIZEN_START == 3,
     weights = mlp_full[CITIZEN_START == 3]$weight2_coal) %>%
  tidy()

# > Figure G15 (left panel): unweighted model ####

felm(NATURALIZED_END ~ base_decline_magnitude_sqrt*(GROUP_COAL_PCTGROUP_START_SQRT+
                                                      BARTIK_MEAN_SQRT) | STATEICP_RANGE + SCICP_START | 0 | GROUP_SCICP,
     data = mlp_full,
     subset = CITIZEN_START == 3) -> all_unweighted_min

felm(NATURALIZED_END ~ base_decline_magnitude_sqrt*(GROUP_COAL_PCTGROUP_START_SQRT + 
                                                      BARTIK_MEAN_SQRT + 
                                                      GROUP_PCTCOUNTY_START_SQRT + 
                                                      COAL_RELIANCE_START_SQRT +
                                                      RURAL_PCTCOUNTY_START + 
                                                      BLACK_PCTCOUNTY_START_SQRT + 
                                                      YRIMMIG_START +
                                                      INCWAGE_MEDIAN_START_LN + 
                                                      MARRIED_START) | STATEICP_RANGE + SCICP_START | 0 | GROUP_SCICP,
     data = mlp_full,
     subset = CITIZEN_START == 3) -> all_unweighted_full

felm_int_plot(modelMin = all_unweighted_min,
              modelFull = all_unweighted_full,
              title = "All",
              ymin = -0.85, ymax = 0.5)

# > Figure G15 (right panel): logarithmic transformations ####

felm(NATURALIZED_END ~ base_decline_magnitude_ln*(GROUP_COAL_PCTGROUP_START_LN+
                                                    BARTIK_MEAN_LN) | STATEICP_RANGE + SCICP_START | 0 | GROUP_SCICP,
     data = mlp_full,
     subset = CITIZEN_START == 3) -> all_ln_min

felm(NATURALIZED_END ~ base_decline_magnitude_ln*(GROUP_COAL_PCTGROUP_START_LN+
                                                    BARTIK_MEAN_LN + 
                                                    GROUP_PCTCOUNTY_START_LN + 
                                                    COAL_RELIANCE_START_LN +
                                                    RURAL_PCTCOUNTY_START + 
                                                    BLACK_PCTCOUNTY_START_LN + 
                                                    YRIMMIG_START +
                                                    INCWAGE_MEDIAN_START_LN + 
                                                    MARRIED_START) | STATEICP_RANGE + SCICP_START | 0 | GROUP_SCICP,
     data = mlp_full,
     subset = CITIZEN_START == 3) -> all_ln_full

felm_int_plot_for_ln(modelMin = all_ln_min,
                     modelFull = all_ln_full,
                     title = "All",
                     ymin = -0.85, ymax = 0.5)
